New upstream version 18.08upstream/18.08

Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
author: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:52:30 +0100
committer: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:53:17 +0100
commit: b63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree: 83114aac64286fe616506c0b3dfaec2ab86ef835 /lib
parent: ca33590b6af032bff57d9cc70455660466a654b2 (diff)
398 files changed, 45923 insertions, 75106 deletions
diff --git a/lib/Makefile b/lib/Makefile
index ec965a60..afa604e2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,7 +4,10 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-y += librte_compat
+DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
+DEPDIRS-librte_kvargs := librte_compat
 DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
+DEPDIRS-librte_eal := librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci
 DEPDIRS-librte_pci := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
@@ -18,9 +21,10 @@ DEPDIRS-librte_timer := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
 DEPDIRS-librte_cmdline := librte_eal
-DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
-DEPDIRS-librte_ether := librte_net librte_eal librte_mempool librte_ring
-DEPDIRS-librte_ether += librte_mbuf
+DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ethdev
+DEPDIRS-librte_ethdev := librte_net librte_eal librte_mempool librte_ring
+DEPDIRS-librte_ethdev += librte_mbuf
+DEPDIRS-librte_ethdev += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += librte_bbdev
 DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
@@ -28,14 +32,18 @@ DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf
 DEPDIRS-librte_cryptodev += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_SECURITY) += librte_security
 DEPDIRS-librte_security := librte_eal librte_mempool librte_ring librte_mbuf
-DEPDIRS-librte_security += librte_ether
+DEPDIRS-librte_security += librte_ethdev
 DEPDIRS-librte_security += librte_cryptodev
+DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += librte_compressdev
+DEPDIRS-librte_compressdev := librte_eal librte_mempool librte_ring librte_mbuf
+DEPDIRS-librte_compressdev += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
-DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
+DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ethdev librte_hash \
+                           librte_mempool librte_timer librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += librte_rawdev
-DEPDIRS-librte_rawdev := librte_eal librte_ether
+DEPDIRS-librte_rawdev := librte_eal librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
-DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether \
+DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
 			librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DEPDIRS-librte_hash := librte_eal librte_ring
@@ -50,18 +58,18 @@ DEPDIRS-librte_member := librte_eal librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
 DEPDIRS-librte_net := librte_mbuf librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
-DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ethdev
 DEPDIRS-librte_ip_frag += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro
-DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net
+DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ethdev librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats
 DEPDIRS-librte_jobstats := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics
 DEPDIRS-librte_metrics := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_BITRATE) += librte_bitratestats
-DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ether
+DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += librte_latencystats
-DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ether librte_mbuf
+DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ethdev librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
 DEPDIRS-librte_power := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
@@ -71,12 +79,10 @@ DEPDIRS-librte_flow_classify :=  librte_net librte_table librte_acl
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net
 DEPDIRS-librte_sched += librte_timer
-DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
-DEPDIRS-librte_kvargs := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
-DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ether
+DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
-DEPDIRS-librte_port := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_port := librte_eal librte_mempool librte_mbuf librte_ethdev
 DEPDIRS-librte_port += librte_ip_frag librte_sched
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 DEPDIRS-librte_port += librte_kni
@@ -93,15 +99,17 @@ DEPDIRS-librte_pipeline += librte_table librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
 DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
-DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
-DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net
 DEPDIRS-librte_gso += librte_mempool
+DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
+DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
 endif
-DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ethdev
 DEPDIRS-librte_kni += librte_pci
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_bbdev/rte_bbdev.c b/lib/librte_bbdev/rte_bbdev.c
index 74ecc490..c4cc18d9 100644
--- a/lib/librte_bbdev/rte_bbdev.c
+++ b/lib/librte_bbdev/rte_bbdev.c
@@ -495,11 +495,20 @@ rte_bbdev_queue_configure(uint16_t dev_id, uint16_t queue_id,
 					conf->queue_size, queue_id, dev_id);
 			return -EINVAL;
 		}
-		if (conf->priority > dev_info.max_queue_priority) {
+		if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC &&
+			conf->priority > dev_info.max_ul_queue_priority) {
 			rte_bbdev_log(ERR,
 					"Priority (%u) of queue %u of bdev %u must be <= %u",
 					conf->priority, queue_id, dev_id,
-					dev_info.max_queue_priority);
+					dev_info.max_ul_queue_priority);
+			return -EINVAL;
+		}
+		if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC &&
+			conf->priority > dev_info.max_dl_queue_priority) {
+			rte_bbdev_log(ERR,
+					"Priority (%u) of queue %u of bdev %u must be <= %u",
+					conf->priority, queue_id, dev_id,
+					dev_info.max_dl_queue_priority);
 			return -EINVAL;
 		}
 	}
@@ -1116,9 +1125,7 @@ rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type)
 	return NULL;
 }
 
-RTE_INIT(rte_bbdev_init_log);
-static void
-rte_bbdev_init_log(void)
+RTE_INIT(rte_bbdev_init_log)
 {
 	bbdev_logtype = rte_log_register("lib.bbdev");
 	if (bbdev_logtype >= 0)
diff --git a/lib/librte_bbdev/rte_bbdev.h b/lib/librte_bbdev/rte_bbdev.h
index 5e7e4954..25ef409f 100644
--- a/lib/librte_bbdev/rte_bbdev.h
+++ b/lib/librte_bbdev/rte_bbdev.h
@@ -239,6 +239,8 @@ struct rte_bbdev_stats {
 	uint64_t enqueue_err_count;
 	/** Total error count on operations dequeued */
 	uint64_t dequeue_err_count;
+	/** Offload time */
+	uint64_t offload_time;
 };
 
 /**
@@ -279,8 +281,10 @@ struct rte_bbdev_driver_info {
 	uint32_t queue_size_lim;
 	/** Set if device off-loads operation to hardware  */
 	bool hardware_accelerated;
-	/** Max value supported by queue priority */
-	uint8_t max_queue_priority;
+	/** Max value supported by queue priority for DL */
+	uint8_t max_dl_queue_priority;
+	/** Max value supported by queue priority for UL */
+	uint8_t max_ul_queue_priority;
 	/** Set if device supports per-queue interrupts */
 	bool queue_intr_supported;
 	/** Minimum alignment of buffers, in bytes */
diff --git a/lib/librte_bbdev/rte_bbdev_op.h b/lib/librte_bbdev/rte_bbdev_op.h
index 9a80c64a..83f62c2d 100644
--- a/lib/librte_bbdev/rte_bbdev_op.h
+++ b/lib/librte_bbdev/rte_bbdev_op.h
@@ -25,7 +25,23 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#define RTE_BBDEV_MAX_CODE_BLOCKS 64
+/* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
+#define RTE_BBDEV_C_SUBBLOCK (32)
+/* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
+#define RTE_BBDEV_MAX_TB_SIZE (391656)
+/* Maximum size of Code Block (36.212, Table 5.1.3-3) */
+#define RTE_BBDEV_MAX_CB_SIZE (6144)
+/* Minimum size of Code Block (36.212, Table 5.1.3-3) */
+#define RTE_BBDEV_MIN_CB_SIZE (40)
+/* Maximum size of circular buffer */
+#define RTE_BBDEV_MAX_KW (18528)
+/*
+ * Maximum number of Code Blocks in Transport Block. It is calculated based on
+ * maximum size of one Code Block and one Transport Block (considering CRC24A
+ * and CRC24B):
+ * (391656 + 24) / (6144 - 24) = 64
+ */
+#define RTE_BBDEV_MAX_CODE_BLOCKS (64)
 
 /** Flags for turbo decoder operation and capability structure */
 enum rte_bbdev_op_td_flag_bitmasks {
@@ -86,7 +102,11 @@ enum rte_bbdev_op_td_flag_bitmasks {
 	 */
 	RTE_BBDEV_TURBO_MAP_DEC = (1ULL << 14),
 	/**< Set if a device supports scatter-gather functionality */
-	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER = (1ULL << 15)
+	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER = (1ULL << 15),
+	/**< Set to keep CRC24B bits appended while decoding. Only usable when
+	 * decoding Transport Blocks (code_block_mode = 0).
+	 */
+	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP = (1ULL << 16)
 };
 
 /** Flags for turbo encoder operation and capability structure */
@@ -363,6 +383,10 @@ struct rte_bbdev_op_turbo_enc {
 struct rte_bbdev_op_cap_turbo_dec {
 	/**< Flags from rte_bbdev_op_td_flag_bitmasks */
 	uint32_t capability_flags;
+	/** Maximal LLR absolute value. Acceptable LLR values lie in range
+	 * [-max_llr_modulus, max_llr_modulus].
+	 */
+	int8_t max_llr_modulus;
 	uint8_t num_buffers_src;  /**< Num input code block buffers */
 	/**< Num hard output code block buffers */
 	uint8_t num_buffers_hard_out;
diff --git a/lib/librte_bitratestats/meson.build b/lib/librte_bitratestats/meson.build
index ede7e0a5..c35b62b3 100644
--- a/lib/librte_bitratestats/meson.build
+++ b/lib/librte_bitratestats/meson.build
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
 sources = files('rte_bitrate.c')
 headers = files('rte_bitrate.h')
 deps += ['ethdev', 'metrics']
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index 964e3c39..c4b28f62 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -47,6 +47,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data)
 	};
 	int return_value;
 
+	if (bitrate_data == NULL)
+		return -EINVAL;
+
 	return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names));
 	if (return_value >= 0)
 		bitrate_data->id_stats_set = return_value;
@@ -65,6 +68,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
 	const int64_t alpha_percent = 20;
 	uint64_t values[6];
 
+	if (bitrate_data == NULL)
+		return -EINVAL;
+
 	ret_code = rte_eth_stats_get(port_id, &eth_stats);
 	if (ret_code != 0)
 		return ret_code;
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
new file mode 100644
index 00000000..c0e8aaa6
--- /dev/null
+++ b/lib/librte_bpf/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_bpf.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_net -lrte_eal
+LDLIBS += -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf -lrte_ethdev
+ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y)
+LDLIBS += -lelf
+endif
+
+EXPORT_MAP := rte_bpf_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y)
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load_elf.c
+endif
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c
+endif
+
+# install header files
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
new file mode 100644
index 00000000..f590c8c3
--- /dev/null
+++ b/lib/librte_bpf/bpf.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+int rte_bpf_logtype;
+
+__rte_experimental void
+rte_bpf_destroy(struct rte_bpf *bpf)
+{
+	if (bpf != NULL) {
+		if (bpf->jit.func != NULL)
+			munmap(bpf->jit.func, bpf->jit.sz);
+		munmap(bpf, bpf->sz);
+	}
+}
+
+__rte_experimental int
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit)
+{
+	if (bpf == NULL || jit == NULL)
+		return -EINVAL;
+
+	jit[0] = bpf->jit;
+	return 0;
+}
+
+int
+bpf_jit(struct rte_bpf *bpf)
+{
+	int32_t rc;
+
+#ifdef RTE_ARCH_X86_64
+	rc = bpf_jit_x86(bpf);
+#else
+	rc = -ENOTSUP;
+#endif
+
+	if (rc != 0)
+		RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n",
+			__func__, bpf, rc);
+	return rc;
+}
+
+RTE_INIT(rte_bpf_init_log)
+{
+	rte_bpf_logtype = rte_log_register("lib.bpf");
+	if (rte_bpf_logtype >= 0)
+		rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
new file mode 100644
index 00000000..c10f3aec
--- /dev/null
+++ b/lib/librte_bpf/bpf_def.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_BPF_DEF_H_
+#define _RTE_BPF_DEF_H_
+
+/**
+ * @file
+ *
+ * classic BPF (cBPF) and extended BPF (eBPF) related defines.
+ * For more information regarding cBPF and eBPF ISA and their differences,
+ * please refer to:
+ * https://www.kernel.org/doc/Documentation/networking/filter.txt.
+ * As a rule of thumb for that file:
+ * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix,
+ * while eBPF only ones start with ebpf(EBPF)) prefix.
+ */
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The instruction encodings.
+ */
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define	BPF_LD		0x00
+#define	BPF_LDX		0x01
+#define	BPF_ST		0x02
+#define	BPF_STX		0x03
+#define	BPF_ALU		0x04
+#define	BPF_JMP		0x05
+#define	BPF_RET		0x06
+#define	BPF_MISC        0x07
+
+#define EBPF_ALU64	0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define	BPF_W		0x00
+#define	BPF_H		0x08
+#define	BPF_B		0x10
+#define	EBPF_DW		0x18
+
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define	BPF_IMM		0x00
+#define	BPF_ABS		0x20
+#define	BPF_IND		0x40
+#define	BPF_MEM		0x60
+#define	BPF_LEN		0x80
+#define	BPF_MSH		0xa0
+
+#define EBPF_XADD	0xc0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define	BPF_ADD		0x00
+#define	BPF_SUB		0x10
+#define	BPF_MUL		0x20
+#define	BPF_DIV		0x30
+#define	BPF_OR		0x40
+#define	BPF_AND		0x50
+#define	BPF_LSH		0x60
+#define	BPF_RSH		0x70
+#define	BPF_NEG		0x80
+#define	BPF_MOD		0x90
+#define	BPF_XOR		0xa0
+
+#define EBPF_MOV	0xb0
+#define EBPF_ARSH	0xc0
+#define EBPF_END	0xd0
+
+#define	BPF_JA		0x00
+#define	BPF_JEQ		0x10
+#define	BPF_JGT		0x20
+#define	BPF_JGE		0x30
+#define	BPF_JSET        0x40
+
+#define EBPF_JNE	0x50
+#define EBPF_JSGT	0x60
+#define EBPF_JSGE	0x70
+#define EBPF_CALL	0x80
+#define EBPF_EXIT	0x90
+#define EBPF_JLT	0xa0
+#define EBPF_JLE	0xb0
+#define EBPF_JSLT	0xc0
+#define EBPF_JSLE	0xd0
+
+#define BPF_SRC(code)   ((code) & 0x08)
+#define	BPF_K		0x00
+#define	BPF_X		0x08
+
+/* if BPF_OP(code) == EBPF_END */
+#define EBPF_TO_LE	0x00  /* convert to little-endian */
+#define EBPF_TO_BE	0x08  /* convert to big-endian */
+
+/*
+ * eBPF registers
+ */
+enum {
+	EBPF_REG_0,  /* return value from internal function/for eBPF program */
+	EBPF_REG_1,  /* 0-th argument to internal function */
+	EBPF_REG_2,  /* 1-th argument to internal function */
+	EBPF_REG_3,  /* 2-th argument to internal function */
+	EBPF_REG_4,  /* 3-th argument to internal function */
+	EBPF_REG_5,  /* 4-th argument to internal function */
+	EBPF_REG_6,  /* callee saved register */
+	EBPF_REG_7,  /* callee saved register */
+	EBPF_REG_8,  /* callee saved register */
+	EBPF_REG_9,  /* callee saved register */
+	EBPF_REG_10, /* stack pointer (read-only) */
+	EBPF_REG_NUM,
+};
+
+/*
+ * eBPF instruction format
+ */
+struct ebpf_insn {
+	uint8_t code;
+	uint8_t dst_reg:4;
+	uint8_t src_reg:4;
+	int16_t off;
+	int32_t imm;
+};
+
+/*
+ * eBPF allows functions with R1-R5 as arguments.
+ */
+#define	EBPF_FUNC_MAX_ARGS	(EBPF_REG_6 - EBPF_REG_1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_BPF_DEF_H_ */
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
new file mode 100644
index 00000000..6a79139c
--- /dev/null
+++ b/lib/librte_bpf/bpf_exec.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define BPF_JMP_UNC(ins)	((ins) += (ins)->off)
+
+#define BPF_JMP_CND_REG(reg, ins, op, type)	\
+	((ins) += \
+		((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \
+		(ins)->off : 0)
+
+#define BPF_JMP_CND_IMM(reg, ins, op, type)	\
+	((ins) += \
+		((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \
+		(ins)->off : 0)
+
+#define BPF_NEG_ALU(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg]))
+
+#define EBPF_MOV_ALU_REG(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg])
+
+#define BPF_OP_ALU_REG(reg, ins, op, type)	\
+	((reg)[(ins)->dst_reg] = \
+		(type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg])
+
+#define EBPF_MOV_ALU_IMM(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = (type)(ins)->imm)
+
+#define BPF_OP_ALU_IMM(reg, ins, op, type)	\
+	((reg)[(ins)->dst_reg] = \
+		(type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
+
+#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
+	if ((type)(reg)[(ins)->src_reg] == 0) { \
+		RTE_BPF_LOG(ERR, \
+			"%s(%p): division by 0 at pc: %#zx;\n", \
+			__func__, bpf, \
+			(uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \
+		return 0; \
+	} \
+} while (0)
+
+#define BPF_LD_REG(reg, ins, type)	\
+	((reg)[(ins)->dst_reg] = \
+		*(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off))
+
+#define BPF_ST_IMM(reg, ins, type)	\
+	(*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+		(type)(ins)->imm)
+
+#define BPF_ST_REG(reg, ins, type)	\
+	(*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+		(type)(reg)[(ins)->src_reg])
+
+#define BPF_ST_XADD_REG(reg, ins, tp)	\
+	(rte_atomic##tp##_add((rte_atomic##tp##_t *) \
+		(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \
+		reg[ins->src_reg]))
+
+static inline void
+bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+	uint64_t *v;
+
+	v = reg + ins->dst_reg;
+	switch (ins->imm) {
+	case 16:
+		*v = rte_cpu_to_be_16(*v);
+		break;
+	case 32:
+		*v = rte_cpu_to_be_32(*v);
+		break;
+	case 64:
+		*v = rte_cpu_to_be_64(*v);
+		break;
+	}
+}
+
+static inline void
+bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+	uint64_t *v;
+
+	v = reg + ins->dst_reg;
+	switch (ins->imm) {
+	case 16:
+		*v = rte_cpu_to_le_16(*v);
+		break;
+	case 32:
+		*v = rte_cpu_to_le_32(*v);
+		break;
+	case 64:
+		*v = rte_cpu_to_le_64(*v);
+		break;
+	}
+}
+
+static inline uint64_t
+bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
+{
+	const struct ebpf_insn *ins;
+
+	for (ins = bpf->prm.ins; ; ins++) {
+		switch (ins->code) {
+		/* 32 bit ALU IMM operations */
+		case (BPF_ALU | BPF_ADD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, +, uint32_t);
+			break;
+		case (BPF_ALU | BPF_SUB | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, -, uint32_t);
+			break;
+		case (BPF_ALU | BPF_AND | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, &, uint32_t);
+			break;
+		case (BPF_ALU | BPF_OR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
+			break;
+		case (BPF_ALU | BPF_RSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
+			break;
+		case (BPF_ALU | BPF_XOR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MUL | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, *, uint32_t);
+			break;
+		case (BPF_ALU | BPF_DIV | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, /, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MOD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, %, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_K):
+			EBPF_MOV_ALU_IMM(reg, ins, uint32_t);
+			break;
+		/* 32 bit ALU REG operations */
+		case (BPF_ALU | BPF_ADD | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, +, uint32_t);
+			break;
+		case (BPF_ALU | BPF_SUB | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, -, uint32_t);
+			break;
+		case (BPF_ALU | BPF_AND | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, &, uint32_t);
+			break;
+		case (BPF_ALU | BPF_OR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, |, uint32_t);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
+			break;
+		case (BPF_ALU | BPF_RSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
+			break;
+		case (BPF_ALU | BPF_XOR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MUL | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, *, uint32_t);
+			break;
+		case (BPF_ALU | BPF_DIV | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+			BPF_OP_ALU_REG(reg, ins, /, uint32_t);
+			break;
+		case (BPF_ALU | BPF_MOD | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+			BPF_OP_ALU_REG(reg, ins, %, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_X):
+			EBPF_MOV_ALU_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_ALU | BPF_NEG):
+			BPF_NEG_ALU(reg, ins, uint32_t);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+			bpf_alu_be(reg, ins);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+			bpf_alu_le(reg, ins);
+			break;
+		/* 64 bit ALU IMM operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, +, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_SUB | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, -, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_AND | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, &, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_OR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_RSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
+			break;
+		case (EBPF_ALU64 | BPF_XOR | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MUL | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, *, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_DIV | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, /, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MOD | BPF_K):
+			BPF_OP_ALU_IMM(reg, ins, %, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+			EBPF_MOV_ALU_IMM(reg, ins, uint64_t);
+			break;
+		/* 64 bit ALU REG operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, +, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_SUB | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, -, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_AND | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, &, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_OR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, |, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_RSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, >>, int64_t);
+			break;
+		case (EBPF_ALU64 | BPF_XOR | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MUL | BPF_X):
+			BPF_OP_ALU_REG(reg, ins, *, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_DIV | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+			BPF_OP_ALU_REG(reg, ins, /, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_MOD | BPF_X):
+			BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+			BPF_OP_ALU_REG(reg, ins, %, uint64_t);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+			EBPF_MOV_ALU_REG(reg, ins, uint64_t);
+			break;
+		case (EBPF_ALU64 | BPF_NEG):
+			BPF_NEG_ALU(reg, ins, uint64_t);
+			break;
+		/* load instructions */
+		case (BPF_LDX | BPF_MEM | BPF_B):
+			BPF_LD_REG(reg, ins, uint8_t);
+			break;
+		case (BPF_LDX | BPF_MEM | BPF_H):
+			BPF_LD_REG(reg, ins, uint16_t);
+			break;
+		case (BPF_LDX | BPF_MEM | BPF_W):
+			BPF_LD_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_LDX | BPF_MEM | EBPF_DW):
+			BPF_LD_REG(reg, ins, uint64_t);
+			break;
+		/* load 64 bit immediate value */
+		case (BPF_LD | BPF_IMM | EBPF_DW):
+			reg[ins->dst_reg] = (uint32_t)ins[0].imm |
+				(uint64_t)(uint32_t)ins[1].imm << 32;
+			ins++;
+			break;
+		/* store instructions */
+		case (BPF_STX | BPF_MEM | BPF_B):
+			BPF_ST_REG(reg, ins, uint8_t);
+			break;
+		case (BPF_STX | BPF_MEM | BPF_H):
+			BPF_ST_REG(reg, ins, uint16_t);
+			break;
+		case (BPF_STX | BPF_MEM | BPF_W):
+			BPF_ST_REG(reg, ins, uint32_t);
+			break;
+		case (BPF_STX | BPF_MEM | EBPF_DW):
+			BPF_ST_REG(reg, ins, uint64_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_B):
+			BPF_ST_IMM(reg, ins, uint8_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_H):
+			BPF_ST_IMM(reg, ins, uint16_t);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_W):
+			BPF_ST_IMM(reg, ins, uint32_t);
+			break;
+		case (BPF_ST | BPF_MEM | EBPF_DW):
+			BPF_ST_IMM(reg, ins, uint64_t);
+			break;
+		/* atomic add instructions */
+		case (BPF_STX | EBPF_XADD | BPF_W):
+			BPF_ST_XADD_REG(reg, ins, 32);
+			break;
+		case (BPF_STX | EBPF_XADD | EBPF_DW):
+			BPF_ST_XADD_REG(reg, ins, 64);
+			break;
+		/* jump instructions */
+		case (BPF_JMP | BPF_JA):
+			BPF_JMP_UNC(ins);
+			break;
+		/* jump IMM instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, ==, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JNE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, !=, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLT | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, >=, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLE | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, <=, int64_t);
+			break;
+		case (BPF_JMP | BPF_JSET | BPF_K):
+			BPF_JMP_CND_IMM(reg, ins, &, uint64_t);
+			break;
+		/* jump REG instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, ==, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JNE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, !=, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <, uint64_t);
+			break;
+		case (BPF_JMP | BPF_JGE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JLE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <=, uint64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLT | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSGE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, >=, int64_t);
+			break;
+		case (BPF_JMP | EBPF_JSLE | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, <=, int64_t);
+			break;
+		case (BPF_JMP | BPF_JSET | BPF_X):
+			BPF_JMP_CND_REG(reg, ins, &, uint64_t);
+			break;
+		/* call instructions */
+		case (BPF_JMP | EBPF_CALL):
+			reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func.val(
+				reg[EBPF_REG_1], reg[EBPF_REG_2],
+				reg[EBPF_REG_3], reg[EBPF_REG_4],
+				reg[EBPF_REG_5]);
+			break;
+		/* return instruction */
+		case (BPF_JMP | EBPF_EXIT):
+			return reg[EBPF_REG_0];
+		default:
+			RTE_BPF_LOG(ERR,
+				"%s(%p): invalid opcode %#x at pc: %#zx;\n",
+				__func__, bpf, ins->code,
+				(uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+			return 0;
+		}
+	}
+
+	/* should never be reached */
+	RTE_VERIFY(0);
+	return 0;
+}
+
+__rte_experimental uint32_t
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+	uint32_t num)
+{
+	uint32_t i;
+	uint64_t reg[EBPF_REG_NUM];
+	uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+
+	for (i = 0; i != num; i++) {
+
+		reg[EBPF_REG_1] = (uintptr_t)ctx[i];
+		reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack));
+
+		rc[i] = bpf_exec(bpf, reg);
+	}
+
+	return i;
+}
+
+__rte_experimental uint64_t
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx)
+{
+	uint64_t rc;
+
+	rte_bpf_exec_burst(bpf, &ctx, &rc, 1);
+	return rc;
+}
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
new file mode 100644
index 00000000..b577e2cb
--- /dev/null
+++ b/lib/librte_bpf/bpf_impl.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _BPF_H_
+#define _BPF_H_
+
+#include <rte_bpf.h>
+#include <sys/mman.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BPF_STACK_SIZE	0x200
+
+struct rte_bpf {
+	struct rte_bpf_prm prm;
+	struct rte_bpf_jit jit;
+	size_t sz;
+	uint32_t stack_sz;
+};
+
+extern int bpf_validate(struct rte_bpf *bpf);
+
+extern int bpf_jit(struct rte_bpf *bpf);
+
+#ifdef RTE_ARCH_X86_64
+extern int bpf_jit_x86(struct rte_bpf *);
+#endif
+
+extern int rte_bpf_logtype;
+
+#define	RTE_BPF_LOG(lvl, fmt, args...) \
+	rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
+
+static inline size_t
+bpf_size(uint32_t bpf_op_sz)
+{
+	if (bpf_op_sz == BPF_B)
+		return sizeof(uint8_t);
+	else if (bpf_op_sz == BPF_H)
+		return sizeof(uint16_t);
+	else if (bpf_op_sz == BPF_W)
+		return sizeof(uint32_t);
+	else if (bpf_op_sz == EBPF_DW)
+		return sizeof(uint64_t);
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_H_ */
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
new file mode 100644
index 00000000..68ea389f
--- /dev/null
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -0,0 +1,1356 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define GET_BPF_OP(op)	(BPF_OP(op) >> 4)
+
+enum {
+	RAX = 0,  /* scratch, return value */
+	RCX = 1,  /* scratch, 4th arg */
+	RDX = 2,  /* scratch, 3rd arg */
+	RBX = 3,  /* callee saved */
+	RSP = 4,  /* stack pointer */
+	RBP = 5,  /* frame pointer, callee saved */
+	RSI = 6,  /* scratch, 2nd arg */
+	RDI = 7,  /* scratch, 1st arg */
+	R8  = 8,  /* scratch, 5th arg */
+	R9  = 9,  /* scratch, 6th arg */
+	R10 = 10, /* scratch */
+	R11 = 11, /* scratch */
+	R12 = 12, /* callee saved */
+	R13 = 13, /* callee saved */
+	R14 = 14, /* callee saved */
+	R15 = 15, /* callee saved */
+};
+
+#define IS_EXT_REG(r)	((r) >= R8)
+
+enum {
+	REX_PREFIX = 0x40, /* fixed value 0100 */
+	REX_W = 0x8,       /* 64bit operand size */
+	REX_R = 0x4,       /* extension of the ModRM.reg field */
+	REX_X = 0x2,       /* extension of the SIB.index field */
+	REX_B = 0x1,       /* extension of the ModRM.rm field */
+};
+
+enum {
+	MOD_INDIRECT = 0,
+	MOD_IDISP8 = 1,
+	MOD_IDISP32 = 2,
+	MOD_DIRECT = 3,
+};
+
+enum {
+	SIB_SCALE_1 = 0,
+	SIB_SCALE_2 = 1,
+	SIB_SCALE_4 = 2,
+	SIB_SCALE_8 = 3,
+};
+
+/*
+ * eBPF to x86_64 register mappings.
+ */
+static const uint32_t ebpf2x86[] = {
+	[EBPF_REG_0] = RAX,
+	[EBPF_REG_1] = RDI,
+	[EBPF_REG_2] = RSI,
+	[EBPF_REG_3] = RDX,
+	[EBPF_REG_4] = RCX,
+	[EBPF_REG_5] = R8,
+	[EBPF_REG_6] = RBX,
+	[EBPF_REG_7] = R13,
+	[EBPF_REG_8] = R14,
+	[EBPF_REG_9] = R15,
+	[EBPF_REG_10] = RBP,
+};
+
+/*
+ * r10 and r11 are used as a scratch temporary registers.
+ */
+enum {
+	REG_DIV_IMM = R9,
+	REG_TMP0 = R11,
+	REG_TMP1 = R10,
+};
+
+/*
+ * callee saved registers list.
+ * keep RBP as the last one.
+ */
+static const uint32_t save_regs[] = {RBX, R12, R13, R14, R15, RBP};
+
+struct bpf_jit_state {
+	uint32_t idx;
+	size_t sz;
+	struct {
+		uint32_t num;
+		int32_t off;
+	} exit;
+	uint32_t reguse;
+	int32_t *off;
+	uint8_t *ins;
+};
+
+#define	INUSE(v, r)	(((v) >> (r)) & 1)
+#define	USED(v, r)	((v) |= 1 << (r))
+
+union bpf_jit_imm {
+	uint32_t u32;
+	uint8_t u8[4];
+};
+
+/*
+ * In many cases for imm8 we can produce shorter code.
+ */
+static size_t
+imm_size(int32_t v)
+{
+	if (v == (int8_t)v)
+		return sizeof(int8_t);
+	return sizeof(int32_t);
+}
+
+static void
+emit_bytes(struct bpf_jit_state *st, const uint8_t ins[], uint32_t sz)
+{
+	uint32_t i;
+
+	if (st->ins != NULL) {
+		for (i = 0; i != sz; i++)
+			st->ins[st->sz + i] = ins[i];
+	}
+	st->sz += sz;
+}
+
+static void
+emit_imm(struct bpf_jit_state *st, const uint32_t imm, uint32_t sz)
+{
+	union bpf_jit_imm v;
+
+	v.u32 = imm;
+	emit_bytes(st, v.u8, sz);
+}
+
+/*
+ * emit REX byte
+ */
+static void
+emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm)
+{
+	uint8_t rex;
+
+	/* mark operand registers as used*/
+	USED(st->reguse, reg);
+	USED(st->reguse, rm);
+
+	rex = 0;
+	if (BPF_CLASS(op) == EBPF_ALU64 ||
+			op == (BPF_ST | BPF_MEM | EBPF_DW) ||
+			op == (BPF_STX | BPF_MEM | EBPF_DW) ||
+			op == (BPF_STX | EBPF_XADD | EBPF_DW) ||
+			op == (BPF_LD | BPF_IMM | EBPF_DW) ||
+			(BPF_CLASS(op) == BPF_LDX &&
+			BPF_MODE(op) == BPF_MEM &&
+			BPF_SIZE(op) != BPF_W))
+		rex |= REX_W;
+
+	if (IS_EXT_REG(reg))
+		rex |= REX_R;
+
+	if (IS_EXT_REG(rm))
+		rex |= REX_B;
+
+	/* store using SIL, DIL */
+	if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI))
+		rex |= REX_PREFIX;
+
+	if (rex != 0) {
+		rex |= REX_PREFIX;
+		emit_bytes(st, &rex, sizeof(rex));
+	}
+}
+
+/*
+ * emit MODRegRM byte
+ */
+static void
+emit_modregrm(struct bpf_jit_state *st, uint32_t mod, uint32_t reg, uint32_t rm)
+{
+	uint8_t v;
+
+	v = mod << 6 | (reg & 7) << 3 | (rm & 7);
+	emit_bytes(st, &v, sizeof(v));
+}
+
+/*
+ * emit SIB byte
+ */
+static void
+emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base)
+{
+	uint8_t v;
+
+	v = scale << 6 | (idx & 7) << 3 | (base & 7);
+	emit_bytes(st, &v, sizeof(v));
+}
+
+/*
+ * emit xchg %<sreg>, %<dreg>
+ */
+static void
+emit_xchg_reg(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+	const uint8_t ops = 0x87;
+
+	emit_rex(st, EBPF_ALU64, sreg, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit neg %<dreg>
+ */
+static void
+emit_neg(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+	const uint8_t ops = 0xF7;
+	const uint8_t mods = 3;
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+/*
+ * emit mov %<sreg>, %<dreg>
+ */
+static void
+emit_mov_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	const uint8_t ops = 0x89;
+
+	/* if operands are 32-bit, then it can be used to clear upper 32-bit */
+	if (sreg != dreg || BPF_CLASS(op) == BPF_ALU) {
+		emit_rex(st, op, sreg, dreg);
+		emit_bytes(st, &ops, sizeof(ops));
+		emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+	}
+}
+
+/*
+ * emit movzwl %<sreg>, %<dreg>
+ */
+static void
+emit_movzwl(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+	static const uint8_t ops[] = {0x0F, 0xB7};
+
+	emit_rex(st, BPF_ALU, sreg, dreg);
+	emit_bytes(st, ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit ror <imm8>, %<dreg>
+ */
+static void
+emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+	const uint8_t prfx = 0x66;
+	const uint8_t ops = 0xC1;
+	const uint8_t mods = 1;
+
+	emit_bytes(st, &prfx, sizeof(prfx));
+	emit_rex(st, BPF_ALU, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, dreg);
+	emit_imm(st, imm, imm_size(imm));
+}
+
+/*
+ * emit bswap %<dreg>
+ */
+static void
+emit_be2le_48(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+	uint32_t rop;
+
+	const uint8_t ops = 0x0F;
+	const uint8_t mods = 1;
+
+	rop = (imm == 64) ? EBPF_ALU64 : BPF_ALU;
+	emit_rex(st, rop, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+static void
+emit_be2le(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+	if (imm == 16) {
+		emit_ror_imm(st, dreg, 8);
+		emit_movzwl(st, dreg, dreg);
+	} else
+		emit_be2le_48(st, dreg, imm);
+}
+
+/*
+ * In general it is NOP for x86.
+ * Just clear the upper bits.
+ */
+static void
+emit_le2be(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+	if (imm == 16)
+		emit_movzwl(st, dreg, dreg);
+	else if (imm == 32)
+		emit_mov_reg(st, BPF_ALU | EBPF_MOV | BPF_X, dreg, dreg);
+}
+
+/*
+ * emit one of:
+ *   add <imm>, %<dreg>
+ *   and <imm>, %<dreg>
+ *   or  <imm>, %<dreg>
+ *   sub <imm>, %<dreg>
+ *   xor <imm>, %<dreg>
+ */
+static void
+emit_alu_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+	uint8_t mod, opcode;
+	uint32_t bop, imsz;
+
+	const uint8_t op8 = 0x83;
+	const uint8_t op32 = 0x81;
+	static const uint8_t mods[] = {
+		[GET_BPF_OP(BPF_ADD)] = 0,
+		[GET_BPF_OP(BPF_AND)] = 4,
+		[GET_BPF_OP(BPF_OR)] =  1,
+		[GET_BPF_OP(BPF_SUB)] = 5,
+		[GET_BPF_OP(BPF_XOR)] = 6,
+	};
+
+	bop = GET_BPF_OP(op);
+	mod = mods[bop];
+
+	imsz = imm_size(imm);
+	opcode = (imsz == 1) ? op8 : op32;
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &opcode, sizeof(opcode));
+	emit_modregrm(st, MOD_DIRECT, mod, dreg);
+	emit_imm(st, imm, imsz);
+}
+
+/*
+ * emit one of:
+ *   add %<sreg>, %<dreg>
+ *   and %<sreg>, %<dreg>
+ *   or  %<sreg>, %<dreg>
+ *   sub %<sreg>, %<dreg>
+ *   xor %<sreg>, %<dreg>
+ */
+static void
+emit_alu_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	uint32_t bop;
+
+	static const uint8_t ops[] = {
+		[GET_BPF_OP(BPF_ADD)] = 0x01,
+		[GET_BPF_OP(BPF_AND)] = 0x21,
+		[GET_BPF_OP(BPF_OR)] =  0x09,
+		[GET_BPF_OP(BPF_SUB)] = 0x29,
+		[GET_BPF_OP(BPF_XOR)] = 0x31,
+	};
+
+	bop = GET_BPF_OP(op);
+
+	emit_rex(st, op, sreg, dreg);
+	emit_bytes(st, &ops[bop], sizeof(ops[bop]));
+	emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_shift(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+	uint8_t mod;
+	uint32_t bop, opx;
+
+	static const uint8_t ops[] = {0xC1, 0xD3};
+	static const uint8_t mods[] = {
+		[GET_BPF_OP(BPF_LSH)] = 4,
+		[GET_BPF_OP(BPF_RSH)] = 5,
+		[GET_BPF_OP(EBPF_ARSH)] = 7,
+	};
+
+	bop = GET_BPF_OP(op);
+	mod = mods[bop];
+	opx = (BPF_SRC(op) == BPF_X);
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+	emit_modregrm(st, MOD_DIRECT, mod, dreg);
+}
+
+/*
+ * emit one of:
+ *   shl <imm>, %<dreg>
+ *   shr <imm>, %<dreg>
+ *   sar <imm>, %<dreg>
+ */
+static void
+emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+	uint32_t imm)
+{
+	emit_shift(st, op, dreg);
+	emit_imm(st, imm, imm_size(imm));
+}
+
+/*
+ * emit one of:
+ *   shl %<dreg>
+ *   shr %<dreg>
+ *   sar %<dreg>
+ * note that rcx is implicitly used as a source register, so few extra
+ * instructions for register spillage might be necessary.
+ */
+static void
+emit_shift_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	if (sreg != RCX)
+		emit_xchg_reg(st, RCX, sreg);
+
+	emit_shift(st, op, (dreg == RCX) ? sreg : dreg);
+
+	if (sreg != RCX)
+		emit_xchg_reg(st, RCX, sreg);
+}
+
+/*
+ * emit mov <imm>, %<dreg>
+ */
+static void
+emit_mov_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+	const uint8_t ops = 0xC7;
+
+	if (imm == 0) {
+		/* replace 'mov 0, %<dst>' with 'xor %<dst>, %<dst>' */
+		op = BPF_CLASS(op) | BPF_XOR | BPF_X;
+		emit_alu_reg(st, op, dreg, dreg);
+		return;
+	}
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, 0, dreg);
+	emit_imm(st, imm, sizeof(imm));
+}
+
+/*
+ * emit mov <imm64>, %<dreg>
+ */
+static void
+emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0,
+	uint32_t imm1)
+{
+	const uint8_t ops = 0xB8;
+
+	if (imm1 == 0) {
+		emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0);
+		return;
+	}
+
+	emit_rex(st, EBPF_ALU64, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, 0, dreg);
+
+	emit_imm(st, imm0, sizeof(imm0));
+	emit_imm(st, imm1, sizeof(imm1));
+}
+
+/*
+ * note that rax:rdx are implicitly used as source/destination registers,
+ * so some reg spillage is necessary.
+ * emit:
+ * mov %rax, %r11
+ * mov %rdx, %r10
+ * mov %<dreg>, %rax
+ * either:
+ *   mov %<sreg>, %rdx
+ * OR
+ *   mov <imm>, %rdx
+ * mul %rdx
+ * mov %r10, %rdx
+ * mov %rax, %<dreg>
+ * mov %r11, %rax
+ */
+static void
+emit_mul(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+	uint32_t imm)
+{
+	const uint8_t ops = 0xF7;
+	const uint8_t mods = 4;
+
+	/* save rax & rdx */
+	emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0);
+	emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1);
+
+	/* rax = dreg */
+	emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX);
+
+	if (BPF_SRC(op) == BPF_X)
+		/* rdx = sreg */
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X,
+			sreg == RAX ? REG_TMP0 : sreg, RDX);
+	else
+		/* rdx = imm */
+		emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, imm);
+
+	emit_rex(st, op, RAX, RDX);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, RDX);
+
+	if (dreg != RDX)
+		/* restore rdx */
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX);
+
+	if (dreg != RAX) {
+		/* dreg = rax */
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg);
+		/* restore rax */
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX);
+	}
+}
+
+/*
+ * emit mov <ofs>(%<sreg>), %<dreg>
+ * note that for non 64-bit ops, higher bits have to be cleared.
+ */
+static void
+emit_ld_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+	int32_t ofs)
+{
+	uint32_t mods, opsz;
+	const uint8_t op32 = 0x8B;
+	const uint8_t op16[] = {0x0F, 0xB7};
+	const uint8_t op8[] = {0x0F, 0xB6};
+
+	emit_rex(st, op, dreg, sreg);
+
+	opsz = BPF_SIZE(op);
+	if (opsz == BPF_B)
+		emit_bytes(st, op8, sizeof(op8));
+	else if (opsz == BPF_H)
+		emit_bytes(st, op16, sizeof(op16));
+	else
+		emit_bytes(st, &op32, sizeof(op32));
+
+	mods = (imm_size(ofs) == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+	emit_modregrm(st, mods, dreg, sreg);
+	if (sreg == RSP || sreg == R12)
+		emit_sib(st, SIB_SCALE_1, sreg, sreg);
+	emit_imm(st, ofs, imm_size(ofs));
+}
+
+/*
+ * emit one of:
+ *   mov %<sreg>, <ofs>(%<dreg>)
+ *   mov <imm>, <ofs>(%<dreg>)
+ */
+static void
+emit_st_common(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg, uint32_t imm, int32_t ofs)
+{
+	uint32_t mods, imsz, opsz, opx;
+	const uint8_t prfx16 = 0x66;
+
+	/* 8 bit instruction opcodes */
+	static const uint8_t op8[] = {0xC6, 0x88};
+
+	/* 16/32/64 bit instruction opcodes */
+	static const uint8_t ops[] = {0xC7, 0x89};
+
+	/* is the instruction has immediate value or src reg? */
+	opx = (BPF_CLASS(op) == BPF_STX);
+
+	opsz = BPF_SIZE(op);
+	if (opsz == BPF_H)
+		emit_bytes(st, &prfx16, sizeof(prfx16));
+
+	emit_rex(st, op, sreg, dreg);
+
+	if (opsz == BPF_B)
+		emit_bytes(st, &op8[opx], sizeof(op8[opx]));
+	else
+		emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+
+	imsz = imm_size(ofs);
+	mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+	emit_modregrm(st, mods, sreg, dreg);
+
+	if (dreg == RSP || dreg == R12)
+		emit_sib(st, SIB_SCALE_1, dreg, dreg);
+
+	emit_imm(st, ofs, imsz);
+
+	if (opx == 0) {
+		imsz = RTE_MIN(bpf_size(opsz), sizeof(imm));
+		emit_imm(st, imm, imsz);
+	}
+}
+
+static void
+emit_st_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm,
+	int32_t ofs)
+{
+	emit_st_common(st, op, 0, dreg, imm, ofs);
+}
+
+static void
+emit_st_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+	int32_t ofs)
+{
+	emit_st_common(st, op, sreg, dreg, 0, ofs);
+}
+
+/*
+ * emit lock add %<sreg>, <ofs>(%<dreg>)
+ */
+static void
+emit_st_xadd(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg, int32_t ofs)
+{
+	uint32_t imsz, mods;
+
+	const uint8_t lck = 0xF0; /* lock prefix */
+	const uint8_t ops = 0x01; /* add opcode */
+
+	imsz = imm_size(ofs);
+	mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+	emit_bytes(st, &lck, sizeof(lck));
+	emit_rex(st, op, sreg, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, mods, sreg, dreg);
+	emit_imm(st, ofs, imsz);
+}
+
+/*
+ * emit:
+ *    mov <imm64>, (%rax)
+ *    call *%rax
+ */
+static void
+emit_call(struct bpf_jit_state *st, uintptr_t trg)
+{
+	const uint8_t ops = 0xFF;
+	const uint8_t mods = 2;
+
+	emit_ld_imm64(st, RAX, trg, trg >> 32);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, RAX);
+}
+
+/*
+ * emit jmp <ofs>
+ * where 'ofs' is the target offset for the native code.
+ */
+static void
+emit_abs_jmp(struct bpf_jit_state *st, int32_t ofs)
+{
+	int32_t joff;
+	uint32_t imsz;
+
+	const uint8_t op8 = 0xEB;
+	const uint8_t op32 = 0xE9;
+
+	const int32_t sz8 = sizeof(op8) + sizeof(uint8_t);
+	const int32_t sz32 = sizeof(op32) + sizeof(uint32_t);
+
+	/* max possible jmp instruction size */
+	const int32_t iszm = RTE_MAX(sz8, sz32);
+
+	joff = ofs - st->sz;
+	imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+	if (imsz == 1) {
+		emit_bytes(st, &op8, sizeof(op8));
+		joff -= sz8;
+	} else {
+		emit_bytes(st, &op32, sizeof(op32));
+		joff -= sz32;
+	}
+
+	emit_imm(st, joff, imsz);
+}
+
+/*
+ * emit jmp <ofs>
+ * where 'ofs' is the target offset for the BPF bytecode.
+ */
+static void
+emit_jmp(struct bpf_jit_state *st, int32_t ofs)
+{
+	emit_abs_jmp(st, st->off[st->idx + ofs]);
+}
+
+/*
+ * emit one of:
+ *    cmovz %<sreg>, <%dreg>
+ *    cmovne %<sreg>, <%dreg>
+ *    cmova %<sreg>, <%dreg>
+ *    cmovb %<sreg>, <%dreg>
+ *    cmovae %<sreg>, <%dreg>
+ *    cmovbe %<sreg>, <%dreg>
+ *    cmovg %<sreg>, <%dreg>
+ *    cmovl %<sreg>, <%dreg>
+ *    cmovge %<sreg>, <%dreg>
+ *    cmovle %<sreg>, <%dreg>
+ */
+static void
+emit_movcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	uint32_t bop;
+
+	static const uint8_t ops[][2] = {
+		[GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x44},  /* CMOVZ */
+		[GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x45},  /* CMOVNE */
+		[GET_BPF_OP(BPF_JGT)] = {0x0F, 0x47},  /* CMOVA */
+		[GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x42},  /* CMOVB */
+		[GET_BPF_OP(BPF_JGE)] = {0x0F, 0x43},  /* CMOVAE */
+		[GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x46},  /* CMOVBE */
+		[GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x4F}, /* CMOVG */
+		[GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x4C}, /* CMOVL */
+		[GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x4D}, /* CMOVGE */
+		[GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x4E}, /* CMOVLE */
+		[GET_BPF_OP(BPF_JSET)] = {0x0F, 0x45}, /* CMOVNE */
+	};
+
+	bop = GET_BPF_OP(op);
+
+	emit_rex(st, op, dreg, sreg);
+	emit_bytes(st, ops[bop], sizeof(ops[bop]));
+	emit_modregrm(st, MOD_DIRECT, dreg, sreg);
+}
+
+/*
+ * emit one of:
+ * je <ofs>
+ * jne <ofs>
+ * ja <ofs>
+ * jb <ofs>
+ * jae <ofs>
+ * jbe <ofs>
+ * jg <ofs>
+ * jl <ofs>
+ * jge <ofs>
+ * jle <ofs>
+ * where 'ofs' is the target offset for the native code.
+ */
+static void
+emit_abs_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs)
+{
+	uint32_t bop, imsz;
+	int32_t joff;
+
+	static const uint8_t op8[] = {
+		[GET_BPF_OP(BPF_JEQ)] = 0x74,  /* JE */
+		[GET_BPF_OP(EBPF_JNE)] = 0x75,  /* JNE */
+		[GET_BPF_OP(BPF_JGT)] = 0x77,  /* JA */
+		[GET_BPF_OP(EBPF_JLT)] = 0x72,  /* JB */
+		[GET_BPF_OP(BPF_JGE)] = 0x73,  /* JAE */
+		[GET_BPF_OP(EBPF_JLE)] = 0x76,  /* JBE */
+		[GET_BPF_OP(EBPF_JSGT)] = 0x7F, /* JG */
+		[GET_BPF_OP(EBPF_JSLT)] = 0x7C, /* JL */
+		[GET_BPF_OP(EBPF_JSGE)] = 0x7D, /*JGE */
+		[GET_BPF_OP(EBPF_JSLE)] = 0x7E, /* JLE */
+		[GET_BPF_OP(BPF_JSET)] = 0x75, /*JNE */
+	};
+
+	static const uint8_t op32[][2] = {
+		[GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x84},  /* JE */
+		[GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x85},  /* JNE */
+		[GET_BPF_OP(BPF_JGT)] = {0x0F, 0x87},  /* JA */
+		[GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x82},  /* JB */
+		[GET_BPF_OP(BPF_JGE)] = {0x0F, 0x83},  /* JAE */
+		[GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x86},  /* JBE */
+		[GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x8F}, /* JG */
+		[GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x8C}, /* JL */
+		[GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x8D}, /*JGE */
+		[GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x8E}, /* JLE */
+		[GET_BPF_OP(BPF_JSET)] = {0x0F, 0x85}, /*JNE */
+	};
+
+	const int32_t sz8 = sizeof(op8[0]) + sizeof(uint8_t);
+	const int32_t sz32 = sizeof(op32[0]) + sizeof(uint32_t);
+
+	/* max possible jcc instruction size */
+	const int32_t iszm = RTE_MAX(sz8, sz32);
+
+	joff = ofs - st->sz;
+	imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+	bop = GET_BPF_OP(op);
+
+	if (imsz == 1) {
+		emit_bytes(st, &op8[bop], sizeof(op8[bop]));
+		joff -= sz8;
+	} else {
+		emit_bytes(st, op32[bop], sizeof(op32[bop]));
+		joff -= sz32;
+	}
+
+	emit_imm(st, joff, imsz);
+}
+
+/*
+ * emit one of:
+ * je <ofs>
+ * jne <ofs>
+ * ja <ofs>
+ * jb <ofs>
+ * jae <ofs>
+ * jbe <ofs>
+ * jg <ofs>
+ * jl <ofs>
+ * jge <ofs>
+ * jle <ofs>
+ * where 'ofs' is the target offset for the BPF bytecode.
+ */
+static void
+emit_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs)
+{
+	emit_abs_jcc(st, op, st->off[st->idx + ofs]);
+}
+
+
+/*
+ * emit cmp <imm>, %<dreg>
+ */
+static void
+emit_cmp_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+	uint8_t ops;
+	uint32_t imsz;
+
+	const uint8_t op8 = 0x83;
+	const uint8_t op32 = 0x81;
+	const uint8_t mods = 7;
+
+	imsz = imm_size(imm);
+	ops = (imsz == 1) ? op8 : op32;
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, dreg);
+	emit_imm(st, imm, imsz);
+}
+
+/*
+ * emit test <imm>, %<dreg>
+ */
+static void
+emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+	const uint8_t ops = 0xF7;
+	const uint8_t mods = 0;
+
+	emit_rex(st, op, 0, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, dreg);
+	emit_imm(st, imm, imm_size(imm));
+}
+
+static void
+emit_jcc_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+	uint32_t imm, int32_t ofs)
+{
+	if (BPF_OP(op) == BPF_JSET)
+		emit_tst_imm(st, EBPF_ALU64, dreg, imm);
+	else
+		emit_cmp_imm(st, EBPF_ALU64, dreg, imm);
+
+	emit_jcc(st, op, ofs);
+}
+
+/*
+ * emit test %<sreg>, %<dreg>
+ */
+static void
+emit_tst_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	const uint8_t ops = 0x85;
+
+	emit_rex(st, op, sreg, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit cmp %<sreg>, %<dreg>
+ */
+static void
+emit_cmp_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg)
+{
+	const uint8_t ops = 0x39;
+
+	emit_rex(st, op, sreg, dreg);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+
+}
+
+static void
+emit_jcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+	uint32_t dreg, int32_t ofs)
+{
+	if (BPF_OP(op) == BPF_JSET)
+		emit_tst_reg(st, EBPF_ALU64, sreg, dreg);
+	else
+		emit_cmp_reg(st, EBPF_ALU64, sreg, dreg);
+
+	emit_jcc(st, op, ofs);
+}
+
+/*
+ * note that rax:rdx are implicitly used as source/destination registers,
+ * so some reg spillage is necessary.
+ * emit:
+ * mov %rax, %r11
+ * mov %rdx, %r10
+ * mov %<dreg>, %rax
+ * xor %rdx, %rdx
+ * for divisor as immediate value:
+ *   mov <imm>, %r9
+ * div %<divisor_reg>
+ * mov %r10, %rdx
+ * mov %rax, %<dreg>
+ * mov %r11, %rax
+ * either:
+ *   mov %rax, %<dreg>
+ * OR
+ *   mov %rdx, %<dreg>
+ * mov %r11, %rax
+ * mov %r10, %rdx
+ */
+static void
+emit_div(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+	uint32_t imm)
+{
+	uint32_t sr;
+
+	const uint8_t ops = 0xF7;
+	const uint8_t mods = 6;
+
+	if (BPF_SRC(op) == BPF_X) {
+
+		/* check that src divisor is not zero */
+		emit_tst_reg(st, BPF_CLASS(op), sreg, sreg);
+
+		/* exit with return value zero */
+		emit_movcc_reg(st, BPF_CLASS(op) | BPF_JEQ | BPF_X, sreg, RAX);
+		emit_abs_jcc(st, BPF_JMP | BPF_JEQ | BPF_K, st->exit.off);
+	}
+
+	/* save rax & rdx */
+	if (dreg != RAX)
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0);
+	if (dreg != RDX)
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1);
+
+	/* fill rax & rdx */
+	emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX);
+	emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, 0);
+
+	if (BPF_SRC(op) == BPF_X) {
+		sr = sreg;
+		if (sr == RAX)
+			sr = REG_TMP0;
+		else if (sr == RDX)
+			sr = REG_TMP1;
+	} else {
+		sr = REG_DIV_IMM;
+		emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, sr, imm);
+	}
+
+	emit_rex(st, op, 0, sr);
+	emit_bytes(st, &ops, sizeof(ops));
+	emit_modregrm(st, MOD_DIRECT, mods, sr);
+
+	if (BPF_OP(op) == BPF_DIV)
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg);
+	else
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, dreg);
+
+	if (dreg != RAX)
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX);
+	if (dreg != RDX)
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX);
+}
+
+static void
+emit_prolog(struct bpf_jit_state *st, int32_t stack_size)
+{
+	uint32_t i;
+	int32_t spil, ofs;
+
+	spil = 0;
+	for (i = 0; i != RTE_DIM(save_regs); i++)
+		spil += INUSE(st->reguse, save_regs[i]);
+
+	/* we can avoid touching the stack at all */
+	if (spil == 0)
+		return;
+
+
+	emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP,
+		spil * sizeof(uint64_t));
+
+	ofs = 0;
+	for (i = 0; i != RTE_DIM(save_regs); i++) {
+		if (INUSE(st->reguse, save_regs[i]) != 0) {
+			emit_st_reg(st, BPF_STX | BPF_MEM | EBPF_DW,
+				save_regs[i], RSP, ofs);
+			ofs += sizeof(uint64_t);
+		}
+	}
+
+	if (INUSE(st->reguse, RBP) != 0) {
+		emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RSP, RBP);
+		emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, stack_size);
+	}
+}
+
+/*
+ * emit ret
+ */
+static void
+emit_ret(struct bpf_jit_state *st)
+{
+	const uint8_t ops = 0xC3;
+
+	emit_bytes(st, &ops, sizeof(ops));
+}
+
+static void
+emit_epilog(struct bpf_jit_state *st)
+{
+	uint32_t i;
+	int32_t spil, ofs;
+
+	/* if we allready have an epilog generate a jump to it */
+	if (st->exit.num++ != 0) {
+		emit_abs_jmp(st, st->exit.off);
+		return;
+	}
+
+	/* store offset of epilog block */
+	st->exit.off = st->sz;
+
+	spil = 0;
+	for (i = 0; i != RTE_DIM(save_regs); i++)
+		spil += INUSE(st->reguse, save_regs[i]);
+
+	if (spil != 0) {
+
+		if (INUSE(st->reguse, RBP) != 0)
+			emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X,
+				RBP, RSP);
+
+		ofs = 0;
+		for (i = 0; i != RTE_DIM(save_regs); i++) {
+			if (INUSE(st->reguse, save_regs[i]) != 0) {
+				emit_ld_reg(st, BPF_LDX | BPF_MEM | EBPF_DW,
+					RSP, save_regs[i], ofs);
+				ofs += sizeof(uint64_t);
+			}
+		}
+
+		emit_alu_imm(st, EBPF_ALU64 | BPF_ADD | BPF_K, RSP,
+			spil * sizeof(uint64_t));
+	}
+
+	emit_ret(st);
+}
+
+/*
+ * walk through bpf code and translate them x86_64 one.
+ */
+static int
+emit(struct bpf_jit_state *st, const struct rte_bpf *bpf)
+{
+	uint32_t i, dr, op, sr;
+	const struct ebpf_insn *ins;
+
+	/* reset state fields */
+	st->sz = 0;
+	st->exit.num = 0;
+
+	emit_prolog(st, bpf->stack_sz);
+
+	for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+		st->idx = i;
+		st->off[i] = st->sz;
+
+		ins = bpf->prm.ins + i;
+
+		dr = ebpf2x86[ins->dst_reg];
+		sr = ebpf2x86[ins->src_reg];
+		op = ins->code;
+
+		switch (op) {
+		/* 32 bit ALU IMM operations */
+		case (BPF_ALU | BPF_ADD | BPF_K):
+		case (BPF_ALU | BPF_SUB | BPF_K):
+		case (BPF_ALU | BPF_AND | BPF_K):
+		case (BPF_ALU | BPF_OR | BPF_K):
+		case (BPF_ALU | BPF_XOR | BPF_K):
+			emit_alu_imm(st, op, dr, ins->imm);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_K):
+		case (BPF_ALU | BPF_RSH | BPF_K):
+			emit_shift_imm(st, op, dr, ins->imm);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_K):
+			emit_mov_imm(st, op, dr, ins->imm);
+			break;
+		/* 32 bit ALU REG operations */
+		case (BPF_ALU | BPF_ADD | BPF_X):
+		case (BPF_ALU | BPF_SUB | BPF_X):
+		case (BPF_ALU | BPF_AND | BPF_X):
+		case (BPF_ALU | BPF_OR | BPF_X):
+		case (BPF_ALU | BPF_XOR | BPF_X):
+			emit_alu_reg(st, op, sr, dr);
+			break;
+		case (BPF_ALU | BPF_LSH | BPF_X):
+		case (BPF_ALU | BPF_RSH | BPF_X):
+			emit_shift_reg(st, op, sr, dr);
+			break;
+		case (BPF_ALU | EBPF_MOV | BPF_X):
+			emit_mov_reg(st, op, sr, dr);
+			break;
+		case (BPF_ALU | BPF_NEG):
+			emit_neg(st, op, dr);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+			emit_be2le(st, dr, ins->imm);
+			break;
+		case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+			emit_le2be(st, dr, ins->imm);
+			break;
+		/* 64 bit ALU IMM operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_K):
+		case (EBPF_ALU64 | BPF_SUB | BPF_K):
+		case (EBPF_ALU64 | BPF_AND | BPF_K):
+		case (EBPF_ALU64 | BPF_OR | BPF_K):
+		case (EBPF_ALU64 | BPF_XOR | BPF_K):
+			emit_alu_imm(st, op, dr, ins->imm);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_K):
+		case (EBPF_ALU64 | BPF_RSH | BPF_K):
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+			emit_shift_imm(st, op, dr, ins->imm);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+			emit_mov_imm(st, op, dr, ins->imm);
+			break;
+		/* 64 bit ALU REG operations */
+		case (EBPF_ALU64 | BPF_ADD | BPF_X):
+		case (EBPF_ALU64 | BPF_SUB | BPF_X):
+		case (EBPF_ALU64 | BPF_AND | BPF_X):
+		case (EBPF_ALU64 | BPF_OR | BPF_X):
+		case (EBPF_ALU64 | BPF_XOR | BPF_X):
+			emit_alu_reg(st, op, sr, dr);
+			break;
+		case (EBPF_ALU64 | BPF_LSH | BPF_X):
+		case (EBPF_ALU64 | BPF_RSH | BPF_X):
+		case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+			emit_shift_reg(st, op, sr, dr);
+			break;
+		case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+			emit_mov_reg(st, op, sr, dr);
+			break;
+		case (EBPF_ALU64 | BPF_NEG):
+			emit_neg(st, op, dr);
+			break;
+		/* multiply instructions */
+		case (BPF_ALU | BPF_MUL | BPF_K):
+		case (BPF_ALU | BPF_MUL | BPF_X):
+		case (EBPF_ALU64 | BPF_MUL | BPF_K):
+		case (EBPF_ALU64 | BPF_MUL | BPF_X):
+			emit_mul(st, op, sr, dr, ins->imm);
+			break;
+		/* divide instructions */
+		case (BPF_ALU | BPF_DIV | BPF_K):
+		case (BPF_ALU | BPF_MOD | BPF_K):
+		case (BPF_ALU | BPF_DIV | BPF_X):
+		case (BPF_ALU | BPF_MOD | BPF_X):
+		case (EBPF_ALU64 | BPF_DIV | BPF_K):
+		case (EBPF_ALU64 | BPF_MOD | BPF_K):
+		case (EBPF_ALU64 | BPF_DIV | BPF_X):
+		case (EBPF_ALU64 | BPF_MOD | BPF_X):
+			emit_div(st, op, sr, dr, ins->imm);
+			break;
+		/* load instructions */
+		case (BPF_LDX | BPF_MEM | BPF_B):
+		case (BPF_LDX | BPF_MEM | BPF_H):
+		case (BPF_LDX | BPF_MEM | BPF_W):
+		case (BPF_LDX | BPF_MEM | EBPF_DW):
+			emit_ld_reg(st, op, sr, dr, ins->off);
+			break;
+		/* load 64 bit immediate value */
+		case (BPF_LD | BPF_IMM | EBPF_DW):
+			emit_ld_imm64(st, dr, ins[0].imm, ins[1].imm);
+			i++;
+			break;
+		/* store instructions */
+		case (BPF_STX | BPF_MEM | BPF_B):
+		case (BPF_STX | BPF_MEM | BPF_H):
+		case (BPF_STX | BPF_MEM | BPF_W):
+		case (BPF_STX | BPF_MEM | EBPF_DW):
+			emit_st_reg(st, op, sr, dr, ins->off);
+			break;
+		case (BPF_ST | BPF_MEM | BPF_B):
+		case (BPF_ST | BPF_MEM | BPF_H):
+		case (BPF_ST | BPF_MEM | BPF_W):
+		case (BPF_ST | BPF_MEM | EBPF_DW):
+			emit_st_imm(st, op, dr, ins->imm, ins->off);
+			break;
+		/* atomic add instructions */
+		case (BPF_STX | EBPF_XADD | BPF_W):
+		case (BPF_STX | EBPF_XADD | EBPF_DW):
+			emit_st_xadd(st, op, sr, dr, ins->off);
+			break;
+		/* jump instructions */
+		case (BPF_JMP | BPF_JA):
+			emit_jmp(st, ins->off + 1);
+			break;
+		/* jump IMM instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_K):
+		case (BPF_JMP | EBPF_JNE | BPF_K):
+		case (BPF_JMP | BPF_JGT | BPF_K):
+		case (BPF_JMP | EBPF_JLT | BPF_K):
+		case (BPF_JMP | BPF_JGE | BPF_K):
+		case (BPF_JMP | EBPF_JLE | BPF_K):
+		case (BPF_JMP | EBPF_JSGT | BPF_K):
+		case (BPF_JMP | EBPF_JSLT | BPF_K):
+		case (BPF_JMP | EBPF_JSGE | BPF_K):
+		case (BPF_JMP | EBPF_JSLE | BPF_K):
+		case (BPF_JMP | BPF_JSET | BPF_K):
+			emit_jcc_imm(st, op, dr, ins->imm, ins->off + 1);
+			break;
+		/* jump REG instructions */
+		case (BPF_JMP | BPF_JEQ | BPF_X):
+		case (BPF_JMP | EBPF_JNE | BPF_X):
+		case (BPF_JMP | BPF_JGT | BPF_X):
+		case (BPF_JMP | EBPF_JLT | BPF_X):
+		case (BPF_JMP | BPF_JGE | BPF_X):
+		case (BPF_JMP | EBPF_JLE | BPF_X):
+		case (BPF_JMP | EBPF_JSGT | BPF_X):
+		case (BPF_JMP | EBPF_JSLT | BPF_X):
+		case (BPF_JMP | EBPF_JSGE | BPF_X):
+		case (BPF_JMP | EBPF_JSLE | BPF_X):
+		case (BPF_JMP | BPF_JSET | BPF_X):
+			emit_jcc_reg(st, op, sr, dr, ins->off + 1);
+			break;
+		/* call instructions */
+		case (BPF_JMP | EBPF_CALL):
+			emit_call(st,
+				(uintptr_t)bpf->prm.xsym[ins->imm].func.val);
+			break;
+		/* return instruction */
+		case (BPF_JMP | EBPF_EXIT):
+			emit_epilog(st);
+			break;
+		default:
+			RTE_BPF_LOG(ERR,
+				"%s(%p): invalid opcode %#x at pc: %u;\n",
+				__func__, bpf, ins->code, i);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * produce a native ISA version of the given BPF code.
+ */
+int
+bpf_jit_x86(struct rte_bpf *bpf)
+{
+	int32_t rc;
+	uint32_t i;
+	size_t sz;
+	struct bpf_jit_state st;
+
+	/* init state */
+	memset(&st, 0, sizeof(st));
+	st.off = malloc(bpf->prm.nb_ins * sizeof(st.off[0]));
+	if (st.off == NULL)
+		return -ENOMEM;
+
+	/* fill with fake offsets */
+	st.exit.off = INT32_MAX;
+	for (i = 0; i != bpf->prm.nb_ins; i++)
+		st.off[i] = INT32_MAX;
+
+	/*
+	 * dry runs, used to calculate total code size and valid jump offsets.
+	 * stop when we get minimal possible size
+	 */
+	do {
+		sz = st.sz;
+		rc = emit(&st, bpf);
+	} while (rc == 0 && sz != st.sz);
+
+	if (rc == 0) {
+
+		/* allocate memory needed */
+		st.ins = mmap(NULL, st.sz, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (st.ins == MAP_FAILED)
+			rc = -ENOMEM;
+		else
+			/* generate code */
+			rc = emit(&st, bpf);
+	}
+
+	if (rc == 0 && mprotect(st.ins, st.sz, PROT_READ | PROT_EXEC) != 0)
+		rc = -ENOMEM;
+
+	if (rc != 0)
+		munmap(st.ins, st.sz);
+	else {
+		bpf->jit.func = (void *)st.ins;
+		bpf->jit.sz = st.sz;
+	}
+
+	free(st.off);
+	return rc;
+}
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
new file mode 100644
index 00000000..2b84fe72
--- /dev/null
+++ b/lib/librte_bpf/bpf_load.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+static struct rte_bpf *
+bpf_load(const struct rte_bpf_prm *prm)
+{
+	uint8_t *buf;
+	struct rte_bpf *bpf;
+	size_t sz, bsz, insz, xsz;
+
+	xsz =  prm->nb_xsym * sizeof(prm->xsym[0]);
+	insz = prm->nb_ins * sizeof(prm->ins[0]);
+	bsz = sizeof(bpf[0]);
+	sz = insz + xsz + bsz;
+
+	buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buf == MAP_FAILED)
+		return NULL;
+
+	bpf = (void *)buf;
+	bpf->sz = sz;
+
+	memcpy(&bpf->prm, prm, sizeof(bpf->prm));
+
+	memcpy(buf + bsz, prm->xsym, xsz);
+	memcpy(buf + bsz + xsz, prm->ins, insz);
+
+	bpf->prm.xsym = (void *)(buf + bsz);
+	bpf->prm.ins = (void *)(buf + bsz + xsz);
+
+	return bpf;
+}
+
+/*
+ * Check that user provided external symbol.
+ */
+static int
+bpf_check_xsym(const struct rte_bpf_xsym *xsym)
+{
+	uint32_t i;
+
+	if (xsym->name == NULL)
+		return -EINVAL;
+
+	if (xsym->type == RTE_BPF_XTYPE_VAR) {
+		if (xsym->var.desc.type == RTE_BPF_ARG_UNDEF)
+			return -EINVAL;
+	} else if (xsym->type == RTE_BPF_XTYPE_FUNC) {
+
+		if (xsym->func.nb_args > EBPF_FUNC_MAX_ARGS)
+			return -EINVAL;
+
+		/* check function arguments */
+		for (i = 0; i != xsym->func.nb_args; i++) {
+			if (xsym->func.args[i].type == RTE_BPF_ARG_UNDEF)
+				return -EINVAL;
+		}
+
+		/* check return value info */
+		if (xsym->func.ret.type != RTE_BPF_ARG_UNDEF &&
+				xsym->func.ret.size == 0)
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_load(const struct rte_bpf_prm *prm)
+{
+	struct rte_bpf *bpf;
+	int32_t rc;
+	uint32_t i;
+
+	if (prm == NULL || prm->ins == NULL ||
+			(prm->nb_xsym != 0 && prm->xsym == NULL)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	rc = 0;
+	for (i = 0; i != prm->nb_xsym && rc == 0; i++)
+		rc = bpf_check_xsym(prm->xsym + i);
+
+	if (rc != 0) {
+		rte_errno = -rc;
+		RTE_BPF_LOG(ERR, "%s: %d-th xsym is invalid\n", __func__, i);
+		return NULL;
+	}
+
+	bpf = bpf_load(prm);
+	if (bpf == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	rc = bpf_validate(bpf);
+	if (rc == 0) {
+		bpf_jit(bpf);
+		if (mprotect(bpf, bpf->sz, PROT_READ) != 0)
+			rc = -ENOMEM;
+	}
+
+	if (rc != 0) {
+		rte_bpf_destroy(bpf);
+		rte_errno = -rc;
+		return NULL;
+	}
+
+	return bpf;
+}
+
+__rte_experimental __attribute__ ((weak)) struct rte_bpf *
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+	const char *sname)
+{
+	if (prm == NULL || fname == NULL || sname == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	RTE_BPF_LOG(ERR, "%s() is not supported with current config\n"
+		"rebuild with libelf installed\n",
+		__func__);
+	rte_errno = ENOTSUP;
+	return NULL;
+}
diff --git a/lib/librte_bpf/bpf_load_elf.c b/lib/librte_bpf/bpf_load_elf.c
new file mode 100644
index 00000000..96d3630f
--- /dev/null
+++ b/lib/librte_bpf/bpf_load_elf.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <fcntl.h>
+
+#include <libelf.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+/* To overcome compatibility issue */
+#ifndef EM_BPF
+#define	EM_BPF	247
+#endif
+
+static uint32_t
+bpf_find_xsym(const char *sn, enum rte_bpf_xtype type,
+	const struct rte_bpf_xsym fp[], uint32_t fn)
+{
+	uint32_t i;
+
+	if (sn == NULL || fp == NULL)
+		return UINT32_MAX;
+
+	for (i = 0; i != fn; i++) {
+		if (fp[i].type == type && strcmp(sn, fp[i].name) == 0)
+			break;
+	}
+
+	return (i != fn) ? i : UINT32_MAX;
+}
+
+/*
+ * update BPF code at offset *ofs* with a proper address(index) for external
+ * symbol *sn*
+ */
+static int
+resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz,
+	const struct rte_bpf_prm *prm)
+{
+	uint32_t idx, fidx;
+	enum rte_bpf_xtype type;
+
+	if (ofs % sizeof(ins[0]) != 0 || ofs >= ins_sz)
+		return -EINVAL;
+
+	idx = ofs / sizeof(ins[0]);
+	if (ins[idx].code == (BPF_JMP | EBPF_CALL))
+		type = RTE_BPF_XTYPE_FUNC;
+	else if (ins[idx].code == (BPF_LD | BPF_IMM | EBPF_DW) &&
+			ofs < ins_sz - sizeof(ins[idx]))
+		type = RTE_BPF_XTYPE_VAR;
+	else
+		return -EINVAL;
+
+	fidx = bpf_find_xsym(sn, type, prm->xsym, prm->nb_xsym);
+	if (fidx == UINT32_MAX)
+		return -ENOENT;
+
+	/* for function we just need an index in our xsym table */
+	if (type == RTE_BPF_XTYPE_FUNC)
+		ins[idx].imm = fidx;
+	/* for variable we need to store its absolute address */
+	else {
+		ins[idx].imm = (uintptr_t)prm->xsym[fidx].var.val;
+		ins[idx + 1].imm =
+			(uint64_t)(uintptr_t)prm->xsym[fidx].var.val >> 32;
+	}
+
+	return 0;
+}
+
+static int
+check_elf_header(const Elf64_Ehdr *eh)
+{
+	const char *err;
+
+	err = NULL;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	if (eh->e_ident[EI_DATA] != ELFDATA2LSB)
+#else
+	if (eh->e_ident[EI_DATA] != ELFDATA2MSB)
+#endif
+		err = "not native byte order";
+	else if (eh->e_ident[EI_OSABI] != ELFOSABI_NONE)
+		err = "unexpected OS ABI";
+	else if (eh->e_type != ET_REL)
+		err = "unexpected ELF type";
+	else if (eh->e_machine != EM_NONE && eh->e_machine != EM_BPF)
+		err = "unexpected machine type";
+
+	if (err != NULL) {
+		RTE_BPF_LOG(ERR, "%s(): %s\n", __func__, err);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * helper function, find executable section by name.
+ */
+static int
+find_elf_code(Elf *elf, const char *section, Elf_Data **psd, size_t *pidx)
+{
+	Elf_Scn *sc;
+	const Elf64_Ehdr *eh;
+	const Elf64_Shdr *sh;
+	Elf_Data *sd;
+	const char *sn;
+	int32_t rc;
+
+	eh = elf64_getehdr(elf);
+	if (eh == NULL) {
+		rc = elf_errno();
+		RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n",
+			__func__, elf, section, rc, elf_errmsg(rc));
+		return -EINVAL;
+	}
+
+	if (check_elf_header(eh) != 0)
+		return -EINVAL;
+
+	/* find given section by name */
+	for (sc = elf_nextscn(elf, NULL); sc != NULL;
+			sc = elf_nextscn(elf, sc)) {
+		sh = elf64_getshdr(sc);
+		sn = elf_strptr(elf, eh->e_shstrndx, sh->sh_name);
+		if (sn != NULL && strcmp(section, sn) == 0 &&
+				sh->sh_type == SHT_PROGBITS &&
+				sh->sh_flags == (SHF_ALLOC | SHF_EXECINSTR))
+			break;
+	}
+
+	sd = elf_getdata(sc, NULL);
+	if (sd == NULL || sd->d_size == 0 ||
+			sd->d_size % sizeof(struct ebpf_insn) != 0) {
+		rc = elf_errno();
+		RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n",
+			__func__, elf, section, rc, elf_errmsg(rc));
+		return -EINVAL;
+	}
+
+	*psd = sd;
+	*pidx = elf_ndxscn(sc);
+	return 0;
+}
+
+/*
+ * helper function to process data from relocation table.
+ */
+static int
+process_reloc(Elf *elf, size_t sym_idx, Elf64_Rel *re, size_t re_sz,
+	struct ebpf_insn *ins, size_t ins_sz, const struct rte_bpf_prm *prm)
+{
+	int32_t rc;
+	uint32_t i, n;
+	size_t ofs, sym;
+	const char *sn;
+	const Elf64_Ehdr *eh;
+	Elf_Scn *sc;
+	const Elf_Data *sd;
+	Elf64_Sym *sm;
+
+	eh = elf64_getehdr(elf);
+
+	/* get symtable by section index */
+	sc = elf_getscn(elf, sym_idx);
+	sd = elf_getdata(sc, NULL);
+	if (sd == NULL)
+		return -EINVAL;
+	sm = sd->d_buf;
+
+	n = re_sz / sizeof(re[0]);
+	for (i = 0; i != n; i++) {
+
+		ofs = re[i].r_offset;
+
+		/* retrieve index in the symtable */
+		sym = ELF64_R_SYM(re[i].r_info);
+		if (sym * sizeof(sm[0]) >= sd->d_size)
+			return -EINVAL;
+
+		sn = elf_strptr(elf, eh->e_shstrndx, sm[sym].st_name);
+
+		rc = resolve_xsym(sn, ofs, ins, ins_sz, prm);
+		if (rc != 0) {
+			RTE_BPF_LOG(ERR,
+				"resolve_xsym(%s, %zu) error code: %d\n",
+				sn, ofs, rc);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * helper function, find relocation information (if any)
+ * and update bpf code.
+ */
+static int
+elf_reloc_code(Elf *elf, Elf_Data *ed, size_t sidx,
+	const struct rte_bpf_prm *prm)
+{
+	Elf64_Rel *re;
+	Elf_Scn *sc;
+	const Elf64_Shdr *sh;
+	const Elf_Data *sd;
+	int32_t rc;
+
+	rc = 0;
+
+	/* walk through all sections */
+	for (sc = elf_nextscn(elf, NULL); sc != NULL && rc == 0;
+			sc = elf_nextscn(elf, sc)) {
+
+		sh = elf64_getshdr(sc);
+
+		/* relocation data for our code section */
+		if (sh->sh_type == SHT_REL && sh->sh_info == sidx) {
+			sd = elf_getdata(sc, NULL);
+			if (sd == NULL || sd->d_size == 0 ||
+					sd->d_size % sizeof(re[0]) != 0)
+				return -EINVAL;
+			rc = process_reloc(elf, sh->sh_link,
+				sd->d_buf, sd->d_size, ed->d_buf, ed->d_size,
+				prm);
+		}
+	}
+
+	return rc;
+}
+
+static struct rte_bpf *
+bpf_load_elf(const struct rte_bpf_prm *prm, int32_t fd, const char *section)
+{
+	Elf *elf;
+	Elf_Data *sd;
+	size_t sidx;
+	int32_t rc;
+	struct rte_bpf *bpf;
+	struct rte_bpf_prm np;
+
+	elf_version(EV_CURRENT);
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+
+	rc = find_elf_code(elf, section, &sd, &sidx);
+	if (rc == 0)
+		rc = elf_reloc_code(elf, sd, sidx, prm);
+
+	if (rc == 0) {
+		np = prm[0];
+		np.ins = sd->d_buf;
+		np.nb_ins = sd->d_size / sizeof(struct ebpf_insn);
+		bpf = rte_bpf_load(&np);
+	} else {
+		bpf = NULL;
+		rte_errno = -rc;
+	}
+
+	elf_end(elf);
+	return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+	const char *sname)
+{
+	int32_t fd, rc;
+	struct rte_bpf *bpf;
+
+	if (prm == NULL || fname == NULL || sname == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	fd = open(fname, O_RDONLY);
+	if (fd < 0) {
+		rc = errno;
+		RTE_BPF_LOG(ERR, "%s(%s) error code: %d(%s)\n",
+			__func__, fname, rc, strerror(rc));
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	bpf = bpf_load_elf(prm, fd, sname);
+	close(fd);
+
+	if (bpf == NULL) {
+		RTE_BPF_LOG(ERR,
+			"%s(fname=\"%s\", sname=\"%s\") failed, "
+			"error code: %d\n",
+			__func__, fname, sname, rte_errno);
+		return NULL;
+	}
+
+	RTE_BPF_LOG(INFO, "%s(fname=\"%s\", sname=\"%s\") "
+		"successfully creates %p(jit={.func=%p,.sz=%zu});\n",
+		__func__, fname, sname, bpf, bpf->jit.func, bpf->jit.sz);
+	return bpf;
+}
diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c
new file mode 100644
index 00000000..ab9daa52
--- /dev/null
+++ b/lib/librte_bpf/bpf_pkt.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include <rte_bpf_ethdev.h>
+#include "bpf_impl.h"
+
+/*
+ * information about installed BPF rx/tx callback
+ */
+
+struct bpf_eth_cbi {
+	/* used by both data & control path */
+	uint32_t use;    /*usage counter */
+	const struct rte_eth_rxtx_callback *cb;  /* callback handle */
+	struct rte_bpf *bpf;
+	struct rte_bpf_jit jit;
+	/* used by control path only */
+	LIST_ENTRY(bpf_eth_cbi) link;
+	uint16_t port;
+	uint16_t queue;
+} __rte_cache_aligned;
+
+/*
+ * Odd number means that callback is used by datapath.
+ * Even number means that callback is not used by datapath.
+ */
+#define BPF_ETH_CBI_INUSE  1
+
+/*
+ * List to manage RX/TX installed callbacks.
+ */
+LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
+
+enum {
+	BPF_ETH_RX,
+	BPF_ETH_TX,
+	BPF_ETH_NUM,
+};
+
+/*
+ * information about all installed BPF rx/tx callbacks
+ */
+struct bpf_eth_cbh {
+	rte_spinlock_t lock;
+	struct bpf_eth_cbi_list list;
+	uint32_t type;
+};
+
+static struct bpf_eth_cbh rx_cbh = {
+	.lock = RTE_SPINLOCK_INITIALIZER,
+	.list = LIST_HEAD_INITIALIZER(list),
+	.type = BPF_ETH_RX,
+};
+
+static struct bpf_eth_cbh tx_cbh = {
+	.lock = RTE_SPINLOCK_INITIALIZER,
+	.list = LIST_HEAD_INITIALIZER(list),
+	.type = BPF_ETH_TX,
+};
+
+/*
+ * Marks given callback as used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
+{
+	cbi->use++;
+	/* make sure no store/load reordering could happen */
+	rte_smp_mb();
+}
+
+/*
+ * Marks given callback list as not used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
+{
+	/* make sure all previous loads are completed */
+	rte_smp_rmb();
+	cbi->use++;
+}
+
+/*
+ * Waits till datapath finished using given callback.
+ */
+static void
+bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+{
+	uint32_t nuse, puse;
+
+	/* make sure all previous loads and stores are completed */
+	rte_smp_mb();
+
+	puse = cbi->use;
+
+	/* in use, busy wait till current RX/TX iteration is finished */
+	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
+		do {
+			rte_pause();
+			rte_compiler_barrier();
+			nuse = cbi->use;
+		} while (nuse == puse);
+	}
+}
+
+static void
+bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
+{
+	bc->bpf = NULL;
+	memset(&bc->jit, 0, sizeof(bc->jit));
+}
+
+static struct bpf_eth_cbi *
+bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+	struct bpf_eth_cbi *cbi;
+
+	LIST_FOREACH(cbi, &cbh->list, link) {
+		if (cbi->port == port && cbi->queue == queue)
+			break;
+	}
+	return cbi;
+}
+
+static struct bpf_eth_cbi *
+bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+	struct bpf_eth_cbi *cbi;
+
+	/* return an existing one */
+	cbi = bpf_eth_cbh_find(cbh, port, queue);
+	if (cbi != NULL)
+		return cbi;
+
+	cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
+	if (cbi != NULL) {
+		cbi->port = port;
+		cbi->queue = queue;
+		LIST_INSERT_HEAD(&cbh->list, cbi, link);
+	}
+	return cbi;
+}
+
+/*
+ * BPF packet processing routinies.
+ */
+
+static inline uint32_t
+apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
+	uint32_t drop)
+{
+	uint32_t i, j, k;
+	struct rte_mbuf *dr[num];
+
+	for (i = 0, j = 0, k = 0; i != num; i++) {
+
+		/* filter matches */
+		if (rc[i] != 0)
+			mb[j++] = mb[i];
+		/* no match */
+		else
+			dr[k++] = mb[i];
+	}
+
+	if (drop != 0) {
+		/* free filtered out mbufs */
+		for (i = 0; i != k; i++)
+			rte_pktmbuf_free(dr[i]);
+	} else {
+		/* copy filtered out mbufs beyond good ones */
+		for (i = 0; i != k; i++)
+			mb[j + i] = dr[i];
+	}
+
+	return j;
+}
+
+static inline uint32_t
+pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+	uint32_t drop)
+{
+	uint32_t i;
+	void *dp[num];
+	uint64_t rc[num];
+
+	for (i = 0; i != num; i++)
+		dp[i] = rte_pktmbuf_mtod(mb[i], void *);
+
+	rte_bpf_exec_burst(bpf, dp, rc, num);
+	return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+	uint32_t num, uint32_t drop)
+{
+	uint32_t i, n;
+	void *dp;
+	uint64_t rc[num];
+
+	n = 0;
+	for (i = 0; i != num; i++) {
+		dp = rte_pktmbuf_mtod(mb[i], void *);
+		rc[i] = jit->func(dp);
+		n += (rc[i] == 0);
+	}
+
+	if (n != 0)
+		num = apply_filter(mb, rc, num, drop);
+
+	return num;
+}
+
+static inline uint32_t
+pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+	uint32_t drop)
+{
+	uint64_t rc[num];
+
+	rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
+	return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+	uint32_t num, uint32_t drop)
+{
+	uint32_t i, n;
+	uint64_t rc[num];
+
+	n = 0;
+	for (i = 0; i != num; i++) {
+		rc[i] = jit->func(mb[i]);
+		n += (rc[i] == 0);
+	}
+
+	if (n != 0)
+		num = apply_filter(mb, rc, num, drop);
+
+	return num;
+}
+
+/*
+ * RX/TX callbacks for raw data bpf.
+ */
+
+static uint16_t
+bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts,
+	__rte_unused uint16_t max_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts,
+	__rte_unused uint16_t max_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+/*
+ * RX/TX callbacks for mbuf.
+ */
+
+static uint16_t
+bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts,
+	__rte_unused uint16_t max_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts,
+	__rte_unused uint16_t max_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+	struct bpf_eth_cbi *cbi;
+	uint16_t rc;
+
+	cbi = user_param;
+	bpf_eth_cbi_inuse(cbi);
+	rc = (cbi->cb != NULL) ?
+		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
+		nb_pkts;
+	bpf_eth_cbi_unuse(cbi);
+	return rc;
+}
+
+static rte_rx_callback_fn
+select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
+{
+	if (flags & RTE_BPF_ETH_F_JIT) {
+		if (type == RTE_BPF_ARG_PTR)
+			return bpf_rx_callback_jit;
+		else if (type == RTE_BPF_ARG_PTR_MBUF)
+			return bpf_rx_callback_mb_jit;
+	} else if (type == RTE_BPF_ARG_PTR)
+		return bpf_rx_callback_vm;
+	else if (type == RTE_BPF_ARG_PTR_MBUF)
+		return bpf_rx_callback_mb_vm;
+
+	return NULL;
+}
+
+static rte_tx_callback_fn
+select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
+{
+	if (flags & RTE_BPF_ETH_F_JIT) {
+		if (type == RTE_BPF_ARG_PTR)
+			return bpf_tx_callback_jit;
+		else if (type == RTE_BPF_ARG_PTR_MBUF)
+			return bpf_tx_callback_mb_jit;
+	} else if (type == RTE_BPF_ARG_PTR)
+		return bpf_tx_callback_vm;
+	else if (type == RTE_BPF_ARG_PTR_MBUF)
+		return bpf_tx_callback_mb_vm;
+
+	return NULL;
+}
+
+/*
+ * helper function to perform BPF unload for given port/queue.
+ * have to introduce extra complexity (and possible slowdown) here,
+ * as right now there is no safe generic way to remove RX/TX callback
+ * while IO is active.
+ * Still don't free memory allocated for callback handle itself,
+ * again right now there is no safe way to do that without stopping RX/TX
+ * on given port/queue first.
+ */
+static void
+bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
+{
+	/* mark this cbi as empty */
+	bc->cb = NULL;
+	rte_smp_mb();
+
+	/* make sure datapath doesn't use bpf anymore, then destroy bpf */
+	bpf_eth_cbi_wait(bc);
+	rte_bpf_destroy(bc->bpf);
+	bpf_eth_cbi_cleanup(bc);
+}
+
+static void
+bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+	struct bpf_eth_cbi *bc;
+
+	bc = bpf_eth_cbh_find(cbh, port, queue);
+	if (bc == NULL || bc->cb == NULL)
+		return;
+
+	if (cbh->type == BPF_ETH_RX)
+		rte_eth_remove_rx_callback(port, queue, bc->cb);
+	else
+		rte_eth_remove_tx_callback(port, queue, bc->cb);
+
+	bpf_eth_cbi_unload(bc);
+}
+
+
+__rte_experimental void
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
+{
+	struct bpf_eth_cbh *cbh;
+
+	cbh = &rx_cbh;
+	rte_spinlock_lock(&cbh->lock);
+	bpf_eth_unload(cbh, port, queue);
+	rte_spinlock_unlock(&cbh->lock);
+}
+
+__rte_experimental void
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
+{
+	struct bpf_eth_cbh *cbh;
+
+	cbh = &tx_cbh;
+	rte_spinlock_lock(&cbh->lock);
+	bpf_eth_unload(cbh, port, queue);
+	rte_spinlock_unlock(&cbh->lock);
+}
+
+static int
+bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
+	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+	uint32_t flags)
+{
+	int32_t rc;
+	struct bpf_eth_cbi *bc;
+	struct rte_bpf *bpf;
+	rte_rx_callback_fn frx;
+	rte_tx_callback_fn ftx;
+	struct rte_bpf_jit jit;
+
+	frx = NULL;
+	ftx = NULL;
+
+	if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
+			queue >= RTE_MAX_QUEUES_PER_PORT)
+		return -EINVAL;
+
+	if (cbh->type == BPF_ETH_RX)
+		frx = select_rx_callback(prm->prog_arg.type, flags);
+	else
+		ftx = select_tx_callback(prm->prog_arg.type, flags);
+
+	if (frx == NULL && ftx == NULL) {
+		RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
+			__func__, port, queue);
+		return -EINVAL;
+	}
+
+	bpf = rte_bpf_elf_load(prm, fname, sname);
+	if (bpf == NULL)
+		return -rte_errno;
+
+	rte_bpf_get_jit(bpf, &jit);
+
+	if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
+		RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
+			__func__, port, queue);
+		rte_bpf_destroy(bpf);
+		return -ENOTSUP;
+	}
+
+	/* setup/update global callback info */
+	bc = bpf_eth_cbh_add(cbh, port, queue);
+	if (bc == NULL)
+		return -ENOMEM;
+
+	/* remove old one, if any */
+	if (bc->cb != NULL)
+		bpf_eth_unload(cbh, port, queue);
+
+	bc->bpf = bpf;
+	bc->jit = jit;
+
+	if (cbh->type == BPF_ETH_RX)
+		bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
+	else
+		bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
+
+	if (bc->cb == NULL) {
+		rc = -rte_errno;
+		rte_bpf_destroy(bpf);
+		bpf_eth_cbi_cleanup(bc);
+	} else
+		rc = 0;
+
+	return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+	uint32_t flags)
+{
+	int32_t rc;
+	struct bpf_eth_cbh *cbh;
+
+	cbh = &rx_cbh;
+	rte_spinlock_lock(&cbh->lock);
+	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
+	rte_spinlock_unlock(&cbh->lock);
+
+	return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+	uint32_t flags)
+{
+	int32_t rc;
+	struct bpf_eth_cbh *cbh;
+
+	cbh = &tx_cbh;
+	rte_spinlock_lock(&cbh->lock);
+	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
+	rte_spinlock_unlock(&cbh->lock);
+
+	return rc;
+}
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
new file mode 100644
index 00000000..83983efc
--- /dev/null
+++ b/lib/librte_bpf/bpf_validate.c
@@ -0,0 +1,2248 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+struct bpf_reg_val {
+	struct rte_bpf_arg v;
+	uint64_t mask;
+	struct {
+		int64_t min;
+		int64_t max;
+	} s;
+	struct {
+		uint64_t min;
+		uint64_t max;
+	} u;
+};
+
+struct bpf_eval_state {
+	struct bpf_reg_val rv[EBPF_REG_NUM];
+	struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+};
+
+/* possible instruction node colour */
+enum {
+	WHITE,
+	GREY,
+	BLACK,
+	MAX_NODE_COLOUR
+};
+
+/* possible edge types */
+enum {
+	UNKNOWN_EDGE,
+	TREE_EDGE,
+	BACK_EDGE,
+	CROSS_EDGE,
+	MAX_EDGE_TYPE
+};
+
+#define	MAX_EDGES	2
+
+struct inst_node {
+	uint8_t colour;
+	uint8_t nb_edge:4;
+	uint8_t cur_edge:4;
+	uint8_t edge_type[MAX_EDGES];
+	uint32_t edge_dest[MAX_EDGES];
+	uint32_t prev_node;
+	struct bpf_eval_state *evst;
+};
+
+struct bpf_verifier {
+	const struct rte_bpf_prm *prm;
+	struct inst_node *in;
+	uint64_t stack_sz;
+	uint32_t nb_nodes;
+	uint32_t nb_jcc_nodes;
+	uint32_t node_colour[MAX_NODE_COLOUR];
+	uint32_t edge_type[MAX_EDGE_TYPE];
+	struct bpf_eval_state *evst;
+	struct inst_node *evin;
+	struct {
+		uint32_t num;
+		uint32_t cur;
+		struct bpf_eval_state *ent;
+	} evst_pool;
+};
+
+struct bpf_ins_check {
+	struct {
+		uint16_t dreg;
+		uint16_t sreg;
+	} mask;
+	struct {
+		uint16_t min;
+		uint16_t max;
+	} off;
+	struct {
+		uint32_t min;
+		uint32_t max;
+	} imm;
+	const char * (*check)(const struct ebpf_insn *);
+	const char * (*eval)(struct bpf_verifier *, const struct ebpf_insn *);
+};
+
+#define	ALL_REGS	RTE_LEN2MASK(EBPF_REG_NUM, uint16_t)
+#define	WRT_REGS	RTE_LEN2MASK(EBPF_REG_10, uint16_t)
+#define	ZERO_REG	RTE_LEN2MASK(EBPF_REG_1, uint16_t)
+
+/*
+ * check and evaluate functions for particular instruction types.
+ */
+
+static const char *
+check_alu_bele(const struct ebpf_insn *ins)
+{
+	if (ins->imm != 16 && ins->imm != 32 && ins->imm != 64)
+		return "invalid imm field";
+	return NULL;
+}
+
+static const char *
+eval_exit(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	RTE_SET_USED(ins);
+	if (bvf->evst->rv[EBPF_REG_0].v.type == RTE_BPF_ARG_UNDEF)
+		return "undefined return value";
+	return NULL;
+}
+
+/* setup max possible with this mask bounds */
+static void
+eval_umax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	rv->u.max = mask;
+	rv->u.min = 0;
+}
+
+static void
+eval_smax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	rv->s.max = mask >> 1;
+	rv->s.min = rv->s.max ^ UINT64_MAX;
+}
+
+static void
+eval_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_umax_bound(rv, mask);
+	eval_smax_bound(rv, mask);
+}
+
+static void
+eval_fill_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_max_bound(rv, mask);
+	rv->v.type = RTE_BPF_ARG_RAW;
+	rv->mask = mask;
+}
+
+static void
+eval_fill_imm64(struct bpf_reg_val *rv, uint64_t mask, uint64_t val)
+{
+	rv->mask = mask;
+	rv->s.min = val;
+	rv->s.max = val;
+	rv->u.min = val;
+	rv->u.max = val;
+}
+
+static void
+eval_fill_imm(struct bpf_reg_val *rv, uint64_t mask, int32_t imm)
+{
+	uint64_t v;
+
+	v = (uint64_t)imm & mask;
+
+	rv->v.type = RTE_BPF_ARG_RAW;
+	eval_fill_imm64(rv, mask, v);
+}
+
+static const char *
+eval_ld_imm64(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t i;
+	uint64_t val;
+	struct bpf_reg_val *rd;
+
+	val = (uint32_t)ins[0].imm | (uint64_t)(uint32_t)ins[1].imm << 32;
+
+	rd = bvf->evst->rv + ins->dst_reg;
+	rd->v.type = RTE_BPF_ARG_RAW;
+	eval_fill_imm64(rd, UINT64_MAX, val);
+
+	for (i = 0; i != bvf->prm->nb_xsym; i++) {
+
+		/* load of external variable */
+		if (bvf->prm->xsym[i].type == RTE_BPF_XTYPE_VAR &&
+				(uintptr_t)bvf->prm->xsym[i].var.val == val) {
+			rd->v = bvf->prm->xsym[i].var.desc;
+			eval_fill_imm64(rd, UINT64_MAX, 0);
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+eval_apply_mask(struct bpf_reg_val *rv, uint64_t mask)
+{
+	struct bpf_reg_val rt;
+
+	rt.u.min = rv->u.min & mask;
+	rt.u.max = rv->u.max & mask;
+	if (rt.u.min != rv->u.min || rt.u.max != rv->u.max) {
+		rv->u.max = RTE_MAX(rt.u.max, mask);
+		rv->u.min = 0;
+	}
+
+	eval_smax_bound(&rt, mask);
+	rv->s.max = RTE_MIN(rt.s.max, rv->s.max);
+	rv->s.min = RTE_MAX(rt.s.min, rv->s.min);
+
+	rv->mask = mask;
+}
+
+static void
+eval_add(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+	struct bpf_reg_val rv;
+
+	rv.u.min = (rd->u.min + rs->u.min) & msk;
+	rv.u.max = (rd->u.min + rs->u.max) & msk;
+	rv.s.min = (rd->s.min + rs->s.min) & msk;
+	rv.s.max = (rd->s.max + rs->s.max) & msk;
+
+	/*
+	 * if at least one of the operands is not constant,
+	 * then check for overflow
+	 */
+	if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+			(rv.u.min < rd->u.min || rv.u.max < rd->u.max))
+		eval_umax_bound(&rv, msk);
+
+	if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+			(((rs->s.min < 0 && rv.s.min > rd->s.min) ||
+			rv.s.min < rd->s.min) ||
+			((rs->s.max < 0 && rv.s.max > rd->s.max) ||
+				rv.s.max < rd->s.max)))
+		eval_smax_bound(&rv, msk);
+
+	rd->s = rv.s;
+	rd->u = rv.u;
+}
+
+static void
+eval_sub(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+	struct bpf_reg_val rv;
+
+	rv.u.min = (rd->u.min - rs->u.min) & msk;
+	rv.u.max = (rd->u.min - rs->u.max) & msk;
+	rv.s.min = (rd->s.min - rs->s.min) & msk;
+	rv.s.max = (rd->s.max - rs->s.max) & msk;
+
+	/*
+	 * if at least one of the operands is not constant,
+	 * then check for overflow
+	 */
+	if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+			(rv.u.min > rd->u.min || rv.u.max > rd->u.max))
+		eval_umax_bound(&rv, msk);
+
+	if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+			(((rs->s.min < 0 && rv.s.min < rd->s.min) ||
+			rv.s.min > rd->s.min) ||
+			((rs->s.max < 0 && rv.s.max < rd->s.max) ||
+			rv.s.max > rd->s.max)))
+		eval_smax_bound(&rv, msk);
+
+	rd->s = rv.s;
+	rd->u = rv.u;
+}
+
+static void
+eval_lsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	/* check for overflow */
+	if (rd->u.max > RTE_LEN2MASK(opsz - rs->u.max, uint64_t))
+		eval_umax_bound(rd, msk);
+	else {
+		rd->u.max <<= rs->u.max;
+		rd->u.min <<= rs->u.min;
+	}
+
+	/* check that dreg values are and would remain always positive */
+	if ((uint64_t)rd->s.min >> (opsz - 1) != 0 || rd->s.max >=
+			RTE_LEN2MASK(opsz - rs->u.max - 1, int64_t))
+		eval_smax_bound(rd, msk);
+	else {
+		rd->s.max <<= rs->u.max;
+		rd->s.min <<= rs->u.min;
+	}
+}
+
+static void
+eval_rsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	rd->u.max >>= rs->u.min;
+	rd->u.min >>= rs->u.max;
+
+	/* check that dreg values are always positive */
+	if ((uint64_t)rd->s.min >> (opsz - 1) != 0)
+		eval_smax_bound(rd, msk);
+	else {
+		rd->s.max >>= rs->u.min;
+		rd->s.min >>= rs->u.max;
+	}
+}
+
+static void
+eval_arsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	uint32_t shv;
+
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	rd->u.max = (int64_t)rd->u.max >> rs->u.min;
+	rd->u.min = (int64_t)rd->u.min >> rs->u.max;
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min <<= opsz;
+		rd->s.max <<= opsz;
+		shv = opsz;
+	} else
+		shv = 0;
+
+	if (rd->s.min < 0)
+		rd->s.min = (rd->s.min >> (rs->u.min + shv)) & msk;
+	else
+		rd->s.min = (rd->s.min >> (rs->u.max + shv)) & msk;
+
+	if (rd->s.max < 0)
+		rd->s.max = (rd->s.max >> (rs->u.max + shv)) & msk;
+	else
+		rd->s.max = (rd->s.max >> (rs->u.min + shv)) & msk;
+}
+
+static uint64_t
+eval_umax_bits(uint64_t v, size_t opsz)
+{
+	if (v == 0)
+		return 0;
+
+	v = __builtin_clzll(v);
+	return RTE_LEN2MASK(opsz - v, uint64_t);
+}
+
+/* estimate max possible value for (v1 & v2) */
+static uint64_t
+eval_uand_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+	v1 = eval_umax_bits(v1, opsz);
+	v2 = eval_umax_bits(v2, opsz);
+	return (v1 & v2);
+}
+
+/* estimate max possible value for (v1 | v2) */
+static uint64_t
+eval_uor_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+	v1 = eval_umax_bits(v1, opsz);
+	v2 = eval_umax_bits(v2, opsz);
+	return (v1 | v2);
+}
+
+static void
+eval_and(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min &= rs->u.min;
+		rd->u.max &= rs->u.max;
+	} else {
+		rd->u.max = eval_uand_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min &= rs->u.min;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min &= rs->s.min;
+		rd->s.max &= rs->s.max;
+	/* at least one of operand is non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uand_max(rd->s.max & (msk >> 1),
+			rs->s.max & (msk >> 1), opsz);
+		rd->s.min &= rs->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_or(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min |= rs->u.min;
+		rd->u.max |= rs->u.max;
+	} else {
+		rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min |= rs->u.min;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min |= rs->s.min;
+		rd->s.max |= rs->s.max;
+
+	/* both operands are non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+		rd->s.min |= rs->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_xor(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min ^= rs->u.min;
+		rd->u.max ^= rs->u.max;
+	} else {
+		rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min = 0;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min ^= rs->s.min;
+		rd->s.max ^= rs->s.max;
+
+	/* both operands are non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+		rd->s.min = 0;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_mul(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min = (rd->u.min * rs->u.min) & msk;
+		rd->u.max = (rd->u.max * rs->u.max) & msk;
+	/* check for overflow */
+	} else if (rd->u.max <= msk >> opsz / 2 && rs->u.max <= msk >> opsz) {
+		rd->u.max *= rs->u.max;
+		rd->u.min *= rd->u.min;
+	} else
+		eval_umax_bound(rd, msk);
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min = (rd->s.min * rs->s.min) & msk;
+		rd->s.max = (rd->s.max * rs->s.max) & msk;
+	/* check that both operands are positive and no overflow */
+	} else if (rd->s.min >= 0 && rs->s.min >= 0) {
+		rd->s.max *= rs->s.max;
+		rd->s.min *= rd->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static const char *
+eval_divmod(uint32_t op, struct bpf_reg_val *rd, struct bpf_reg_val *rs,
+	size_t opsz, uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		if (rs->u.max == 0)
+			return "division by 0";
+		if (op == BPF_DIV) {
+			rd->u.min /= rs->u.min;
+			rd->u.max /= rs->u.max;
+		} else {
+			rd->u.min %= rs->u.min;
+			rd->u.max %= rs->u.max;
+		}
+	} else {
+		if (op == BPF_MOD)
+			rd->u.max = RTE_MIN(rd->u.max, rs->u.max - 1);
+		else
+			rd->u.max = rd->u.max;
+		rd->u.min = 0;
+	}
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min = (int32_t)rd->s.min;
+		rd->s.max = (int32_t)rd->s.max;
+		rs->s.min = (int32_t)rs->s.min;
+		rs->s.max = (int32_t)rs->s.max;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		if (rs->s.max == 0)
+			return "division by 0";
+		if (op == BPF_DIV) {
+			rd->s.min /= rs->s.min;
+			rd->s.max /= rs->s.max;
+		} else {
+			rd->s.min %= rs->s.min;
+			rd->s.max %= rs->s.max;
+		}
+	} else if (op == BPF_MOD) {
+		rd->s.min = RTE_MAX(rd->s.max, 0);
+		rd->s.min = RTE_MIN(rd->s.min, 0);
+	} else
+		eval_smax_bound(rd, msk);
+
+	rd->s.max &= msk;
+	rd->s.min &= msk;
+
+	return NULL;
+}
+
+static void
+eval_neg(struct bpf_reg_val *rd, size_t opsz, uint64_t msk)
+{
+	uint64_t ux, uy;
+	int64_t sx, sy;
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->u.min = (int32_t)rd->u.min;
+		rd->u.max = (int32_t)rd->u.max;
+	}
+
+	ux = -(int64_t)rd->u.min & msk;
+	uy = -(int64_t)rd->u.max & msk;
+
+	rd->u.max = RTE_MAX(ux, uy);
+	rd->u.min = RTE_MIN(ux, uy);
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min = (int32_t)rd->s.min;
+		rd->s.max = (int32_t)rd->s.max;
+	}
+
+	sx = -rd->s.min & msk;
+	sy = -rd->s.max & msk;
+
+	rd->s.max = RTE_MAX(sx, sy);
+	rd->s.min = RTE_MIN(sx, sy);
+}
+
+/*
+ * check that destination and source operand are in defined state.
+ */
+static const char *
+eval_defined(const struct bpf_reg_val *dst, const struct bpf_reg_val *src)
+{
+	if (dst != NULL && dst->v.type == RTE_BPF_ARG_UNDEF)
+		return "dest reg value is undefined";
+	if (src != NULL && src->v.type == RTE_BPF_ARG_UNDEF)
+		return "src reg value is undefined";
+	return NULL;
+}
+
+static const char *
+eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint64_t msk;
+	uint32_t op;
+	size_t opsz;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd, rs;
+
+	opsz = (BPF_CLASS(ins->code) == BPF_ALU) ?
+		sizeof(uint32_t) : sizeof(uint64_t);
+	opsz = opsz * CHAR_BIT;
+	msk = RTE_LEN2MASK(opsz, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+
+	if (BPF_SRC(ins->code) == BPF_X) {
+		rs = st->rv[ins->src_reg];
+		eval_apply_mask(&rs, msk);
+	} else
+		eval_fill_imm(&rs, msk, ins->imm);
+
+	eval_apply_mask(rd, msk);
+
+	op = BPF_OP(ins->code);
+
+	err = eval_defined((op != EBPF_MOV) ? rd : NULL,
+			(op != BPF_NEG) ? &rs : NULL);
+	if (err != NULL)
+		return err;
+
+	if (op == BPF_ADD)
+		eval_add(rd, &rs, msk);
+	else if (op == BPF_SUB)
+		eval_sub(rd, &rs, msk);
+	else if (op == BPF_LSH)
+		eval_lsh(rd, &rs, opsz, msk);
+	else if (op == BPF_RSH)
+		eval_rsh(rd, &rs, opsz, msk);
+	else if (op == EBPF_ARSH)
+		eval_arsh(rd, &rs, opsz, msk);
+	else if (op == BPF_AND)
+		eval_and(rd, &rs, opsz, msk);
+	else if (op == BPF_OR)
+		eval_or(rd, &rs, opsz, msk);
+	else if (op == BPF_XOR)
+		eval_xor(rd, &rs, opsz, msk);
+	else if (op == BPF_MUL)
+		eval_mul(rd, &rs, opsz, msk);
+	else if (op == BPF_DIV || op == BPF_MOD)
+		err = eval_divmod(op, rd, &rs, opsz, msk);
+	else if (op == BPF_NEG)
+		eval_neg(rd, opsz, msk);
+	else if (op == EBPF_MOV)
+		*rd = rs;
+	else
+		eval_max_bound(rd, msk);
+
+	return err;
+}
+
+static const char *
+eval_bele(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint64_t msk;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd;
+	const char *err;
+
+	msk = RTE_LEN2MASK(ins->imm, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+
+	err = eval_defined(rd, NULL);
+	if (err != NULL)
+		return err;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_BE))
+		eval_max_bound(rd, msk);
+	else
+		eval_apply_mask(rd, msk);
+#else
+	if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_LE))
+		eval_max_bound(rd, msk);
+	else
+		eval_apply_mask(rd, msk);
+#endif
+
+	return NULL;
+}
+
+static const char *
+eval_ptr(struct bpf_verifier *bvf, struct bpf_reg_val *rm, uint32_t opsz,
+	uint32_t align, int16_t off)
+{
+	struct bpf_reg_val rv;
+
+	/* calculate reg + offset */
+	eval_fill_imm(&rv, rm->mask, off);
+	eval_add(rm, &rv, rm->mask);
+
+	if (RTE_BPF_ARG_PTR_TYPE(rm->v.type) == 0)
+		return "destination is not a pointer";
+
+	if (rm->mask != UINT64_MAX)
+		return "pointer truncation";
+
+	if (rm->u.max + opsz > rm->v.size ||
+			(uint64_t)rm->s.max + opsz > rm->v.size ||
+			rm->s.min < 0)
+		return "memory boundary violation";
+
+	if (rm->u.max % align !=  0)
+		return "unaligned memory access";
+
+	if (rm->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+				rm->u.max != (uint64_t)rm->s.max)
+			return "stack access with variable offset";
+
+		bvf->stack_sz = RTE_MAX(bvf->stack_sz, rm->v.size - rm->u.max);
+
+	/* pointer to mbuf */
+	} else if (rm->v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+		if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+				rm->u.max != (uint64_t)rm->s.max)
+			return "mbuf access with variable offset";
+	}
+
+	return NULL;
+}
+
+static void
+eval_max_load(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_umax_bound(rv, mask);
+
+	/* full 64-bit load */
+	if (mask == UINT64_MAX)
+		eval_smax_bound(rv, mask);
+
+	/* zero-extend load */
+	rv->s.min = rv->u.min;
+	rv->s.max = rv->u.max;
+}
+
+
+static const char *
+eval_load(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t opsz;
+	uint64_t msk;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd, rs;
+	const struct bpf_reg_val *sv;
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+	rs = st->rv[ins->src_reg];
+	opsz = bpf_size(BPF_SIZE(ins->code));
+	msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+	err = eval_ptr(bvf, &rs, opsz, 1, ins->off);
+	if (err != NULL)
+		return err;
+
+	if (rs.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		sv = st->sv + rs.u.max / sizeof(uint64_t);
+		if (sv->v.type == RTE_BPF_ARG_UNDEF || sv->mask < msk)
+			return "undefined value on the stack";
+
+		*rd = *sv;
+
+	/* pointer to mbuf */
+	} else if (rs.v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+		if (rs.u.max == offsetof(struct rte_mbuf, next)) {
+			eval_fill_imm(rd, msk, 0);
+			rd->v = rs.v;
+		} else if (rs.u.max == offsetof(struct rte_mbuf, buf_addr)) {
+			eval_fill_imm(rd, msk, 0);
+			rd->v.type = RTE_BPF_ARG_PTR;
+			rd->v.size = rs.v.buf_size;
+		} else if (rs.u.max == offsetof(struct rte_mbuf, data_off)) {
+			eval_fill_imm(rd, msk, RTE_PKTMBUF_HEADROOM);
+			rd->v.type = RTE_BPF_ARG_RAW;
+		} else {
+			eval_max_load(rd, msk);
+			rd->v.type = RTE_BPF_ARG_RAW;
+		}
+
+	/* pointer to raw data */
+	} else {
+		eval_max_load(rd, msk);
+		rd->v.type = RTE_BPF_ARG_RAW;
+	}
+
+	return NULL;
+}
+
+static const char *
+eval_mbuf_store(const struct bpf_reg_val *rv, uint32_t opsz)
+{
+	uint32_t i;
+
+	static const struct {
+		size_t off;
+		size_t sz;
+	} mbuf_ro_fileds[] = {
+		{ .off = offsetof(struct rte_mbuf, buf_addr), },
+		{ .off = offsetof(struct rte_mbuf, refcnt), },
+		{ .off = offsetof(struct rte_mbuf, nb_segs), },
+		{ .off = offsetof(struct rte_mbuf, buf_len), },
+		{ .off = offsetof(struct rte_mbuf, pool), },
+		{ .off = offsetof(struct rte_mbuf, next), },
+		{ .off = offsetof(struct rte_mbuf, priv_size), },
+	};
+
+	for (i = 0; i != RTE_DIM(mbuf_ro_fileds) &&
+			(mbuf_ro_fileds[i].off + mbuf_ro_fileds[i].sz <=
+			rv->u.max || rv->u.max + opsz <= mbuf_ro_fileds[i].off);
+			i++)
+		;
+
+	if (i != RTE_DIM(mbuf_ro_fileds))
+		return "store to the read-only mbuf field";
+
+	return NULL;
+
+}
+
+static const char *
+eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t opsz;
+	uint64_t msk;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val rd, rs, *sv;
+
+	opsz = bpf_size(BPF_SIZE(ins->code));
+	msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv[ins->dst_reg];
+
+	if (BPF_CLASS(ins->code) == BPF_STX) {
+		rs = st->rv[ins->src_reg];
+		eval_apply_mask(&rs, msk);
+	} else
+		eval_fill_imm(&rs, msk, ins->imm);
+
+	err = eval_defined(NULL, &rs);
+	if (err != NULL)
+		return err;
+
+	err = eval_ptr(bvf, &rd, opsz, 1, ins->off);
+	if (err != NULL)
+		return err;
+
+	if (rd.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		sv = st->sv + rd.u.max / sizeof(uint64_t);
+		if (BPF_CLASS(ins->code) == BPF_STX &&
+				BPF_MODE(ins->code) == EBPF_XADD)
+			eval_max_bound(sv, msk);
+		else
+			*sv = rs;
+
+	/* pointer to mbuf */
+	} else if (rd.v.type == RTE_BPF_ARG_PTR_MBUF) {
+		err = eval_mbuf_store(&rd, opsz);
+		if (err != NULL)
+			return err;
+	}
+
+	return NULL;
+}
+
+static const char *
+eval_func_arg(struct bpf_verifier *bvf, const struct rte_bpf_arg *arg,
+	struct bpf_reg_val *rv)
+{
+	uint32_t i, n;
+	struct bpf_eval_state *st;
+	const char *err;
+
+	st = bvf->evst;
+
+	if (rv->v.type == RTE_BPF_ARG_UNDEF)
+		return "Undefined argument type";
+
+	if (arg->type != rv->v.type &&
+			arg->type != RTE_BPF_ARG_RAW &&
+			(arg->type != RTE_BPF_ARG_PTR ||
+			RTE_BPF_ARG_PTR_TYPE(rv->v.type) == 0))
+		return "Invalid argument type";
+
+	err = NULL;
+
+	/* argument is a pointer */
+	if (RTE_BPF_ARG_PTR_TYPE(arg->type) != 0) {
+
+		err = eval_ptr(bvf, rv, arg->size, 1, 0);
+
+		/*
+		 * pointer to the variable on the stack is passed
+		 * as an argument, mark stack space it occupies as initialized.
+		 */
+		if (err == NULL && rv->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+			i = rv->u.max / sizeof(uint64_t);
+			n = i + arg->size / sizeof(uint64_t);
+			while (i != n) {
+				eval_fill_max_bound(st->sv + i, UINT64_MAX);
+				i++;
+			};
+		}
+	}
+
+	return err;
+}
+
+static const char *
+eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint64_t msk;
+	uint32_t i, idx;
+	struct bpf_reg_val *rv;
+	const struct rte_bpf_xsym *xsym;
+	const char *err;
+
+	idx = ins->imm;
+
+	if (idx >= bvf->prm->nb_xsym ||
+			bvf->prm->xsym[idx].type != RTE_BPF_XTYPE_FUNC)
+		return "invalid external function index";
+
+	/* for now don't support function calls on 32 bit platform */
+	if (sizeof(uint64_t) != sizeof(uintptr_t))
+		return "function calls are supported only for 64 bit apps";
+
+	xsym = bvf->prm->xsym + idx;
+
+	/* evaluate function arguments */
+	err = NULL;
+	for (i = 0; i != xsym->func.nb_args && err == NULL; i++) {
+		err = eval_func_arg(bvf, xsym->func.args + i,
+			bvf->evst->rv + EBPF_REG_1 + i);
+	}
+
+	/* R1-R5 argument/scratch registers */
+	for (i = EBPF_REG_1; i != EBPF_REG_6; i++)
+		bvf->evst->rv[i].v.type = RTE_BPF_ARG_UNDEF;
+
+	/* update return value */
+
+	rv = bvf->evst->rv + EBPF_REG_0;
+	rv->v = xsym->func.ret;
+	msk = (rv->v.type == RTE_BPF_ARG_RAW) ?
+		RTE_LEN2MASK(rv->v.size * CHAR_BIT, uint64_t) : UINTPTR_MAX;
+	eval_max_bound(rv, msk);
+	rv->mask = msk;
+
+	return err;
+}
+
+static void
+eval_jeq_jne(struct bpf_reg_val *trd, struct bpf_reg_val *trs)
+{
+	/* sreg is constant */
+	if (trs->u.min == trs->u.max) {
+		trd->u = trs->u;
+	/* dreg is constant */
+	} else if (trd->u.min == trd->u.max) {
+		trs->u = trd->u;
+	} else {
+		trd->u.max = RTE_MIN(trd->u.max, trs->u.max);
+		trd->u.min = RTE_MAX(trd->u.min, trs->u.min);
+		trs->u = trd->u;
+	}
+
+	/* sreg is constant */
+	if (trs->s.min == trs->s.max) {
+		trd->s = trs->s;
+	/* dreg is constant */
+	} else if (trd->s.min == trd->s.max) {
+		trs->s = trd->s;
+	} else {
+		trd->s.max = RTE_MIN(trd->s.max, trs->s.max);
+		trd->s.min = RTE_MAX(trd->s.min, trs->s.min);
+		trs->s = trd->s;
+	}
+}
+
+static void
+eval_jgt_jle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->u.max = RTE_MIN(frd->u.max, frs->u.min);
+	trd->u.min = RTE_MAX(trd->u.min, trs->u.min + 1);
+}
+
+static void
+eval_jlt_jge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->u.min = RTE_MAX(frd->u.min, frs->u.min);
+	trd->u.max = RTE_MIN(trd->u.max, trs->u.max - 1);
+}
+
+static void
+eval_jsgt_jsle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->s.max = RTE_MIN(frd->s.max, frs->s.min);
+	trd->s.min = RTE_MAX(trd->s.min, trs->s.min + 1);
+}
+
+static void
+eval_jslt_jsge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->s.min = RTE_MAX(frd->s.min, frs->s.min);
+	trd->s.max = RTE_MIN(trd->s.max, trs->s.max - 1);
+}
+
+static const char *
+eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t op;
+	const char *err;
+	struct bpf_eval_state *fst, *tst;
+	struct bpf_reg_val *frd, *frs, *trd, *trs;
+	struct bpf_reg_val rvf, rvt;
+
+	tst = bvf->evst;
+	fst = bvf->evin->evst;
+
+	frd = fst->rv + ins->dst_reg;
+	trd = tst->rv + ins->dst_reg;
+
+	if (BPF_SRC(ins->code) == BPF_X) {
+		frs = fst->rv + ins->src_reg;
+		trs = tst->rv + ins->src_reg;
+	} else {
+		frs = &rvf;
+		trs = &rvt;
+		eval_fill_imm(frs, UINT64_MAX, ins->imm);
+		eval_fill_imm(trs, UINT64_MAX, ins->imm);
+	}
+
+	err = eval_defined(trd, trs);
+	if (err != NULL)
+		return err;
+
+	op = BPF_OP(ins->code);
+
+	if (op == BPF_JEQ)
+		eval_jeq_jne(trd, trs);
+	else if (op == EBPF_JNE)
+		eval_jeq_jne(frd, frs);
+	else if (op == BPF_JGT)
+		eval_jgt_jle(trd, trs, frd, frs);
+	else if (op == EBPF_JLE)
+		eval_jgt_jle(frd, frs, trd, trs);
+	else if (op == EBPF_JLT)
+		eval_jlt_jge(trd, trs, frd, frs);
+	else if (op == BPF_JGE)
+		eval_jlt_jge(frd, frs, trd, trs);
+	else if (op == EBPF_JSGT)
+		eval_jsgt_jsle(trd, trs, frd, frs);
+	else if (op == EBPF_JSLE)
+		eval_jsgt_jsle(frd, frs, trd, trs);
+	else if (op == EBPF_JLT)
+		eval_jslt_jsge(trd, trs, frd, frs);
+	else if (op == EBPF_JSGE)
+		eval_jslt_jsge(frd, frs, trd, trs);
+
+	return NULL;
+}
+
+/*
+ * validate parameters for each instruction type.
+ */
+static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
+	/* ALU IMM 32-bit instructions */
+	[(BPF_ALU | BPF_ADD | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_SUB | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_AND | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_OR | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_LSH | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_RSH | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_XOR | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_MUL | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | EBPF_MOV | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_DIV | BPF_K)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_MOD | BPF_K)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
+	},
+	/* ALU IMM 64-bit instructions */
+	[(EBPF_ALU64 | BPF_ADD | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_SUB | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_AND | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_OR | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_LSH | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_RSH | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | EBPF_ARSH | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_XOR | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_MUL | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | EBPF_MOV | BPF_K)] = {
+		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_DIV | BPF_K)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_MOD | BPF_K)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
+	},
+	/* ALU REG 32-bit instructions */
+	[(BPF_ALU | BPF_ADD | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_SUB | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_AND | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_OR | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_LSH | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_RSH | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_XOR | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_MUL | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_DIV | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_MOD | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | EBPF_MOV | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | BPF_NEG)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(BPF_ALU | EBPF_END | EBPF_TO_BE)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 16, .max = 64},
+		.check = check_alu_bele,
+		.eval = eval_bele,
+	},
+	[(BPF_ALU | EBPF_END | EBPF_TO_LE)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 16, .max = 64},
+		.check = check_alu_bele,
+		.eval = eval_bele,
+	},
+	/* ALU REG 64-bit instructions */
+	[(EBPF_ALU64 | BPF_ADD | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_SUB | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_AND | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_OR | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_LSH | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_RSH | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | EBPF_ARSH | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_XOR | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_MUL | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_DIV | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_MOD | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | EBPF_MOV | BPF_X)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	[(EBPF_ALU64 | BPF_NEG)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
+	},
+	/* load instructions */
+	[(BPF_LDX | BPF_MEM | BPF_B)] = {
+		.mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_load,
+	},
+	[(BPF_LDX | BPF_MEM | BPF_H)] = {
+		.mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_load,
+	},
+	[(BPF_LDX | BPF_MEM | BPF_W)] = {
+		.mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_load,
+	},
+	[(BPF_LDX | BPF_MEM | EBPF_DW)] = {
+		.mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_load,
+	},
+	/* load 64 bit immediate value */
+	[(BPF_LD | BPF_IMM | EBPF_DW)] = {
+		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_ld_imm64,
+	},
+	/* store REG instructions */
+	[(BPF_STX | BPF_MEM | BPF_B)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	[(BPF_STX | BPF_MEM | BPF_H)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	[(BPF_STX | BPF_MEM | BPF_W)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	[(BPF_STX | BPF_MEM | EBPF_DW)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	/* atomic add instructions */
+	[(BPF_STX | EBPF_XADD | BPF_W)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	[(BPF_STX | EBPF_XADD | EBPF_DW)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_store,
+	},
+	/* store IMM instructions */
+	[(BPF_ST | BPF_MEM | BPF_B)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_store,
+	},
+	[(BPF_ST | BPF_MEM | BPF_H)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_store,
+	},
+	[(BPF_ST | BPF_MEM | BPF_W)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_store,
+	},
+	[(BPF_ST | BPF_MEM | EBPF_DW)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_store,
+	},
+	/* jump instruction */
+	[(BPF_JMP | BPF_JA)] = {
+		.mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+	},
+	/* jcc IMM instructions */
+	[(BPF_JMP | BPF_JEQ | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JNE | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JGT | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JLT | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JGE | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JLE | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSGT | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSLT | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSGE | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSLE | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JSET | BPF_K)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
+	},
+	/* jcc REG instructions */
+	[(BPF_JMP | BPF_JEQ | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JNE | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JGT | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JLT | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JGE | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JLE | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSGT | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSLT | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+	},
+	[(BPF_JMP | EBPF_JSGE | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | EBPF_JSLE | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	[(BPF_JMP | BPF_JSET | BPF_X)] = {
+		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+		.off = { .min = 0, .max = UINT16_MAX},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
+	},
+	/* call instruction */
+	[(BPF_JMP | EBPF_CALL)] = {
+		.mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_call,
+	},
+	/* ret instruction */
+	[(BPF_JMP | EBPF_EXIT)] = {
+		.mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+		.off = { .min = 0, .max = 0},
+		.imm = { .min = 0, .max = 0},
+		.eval = eval_exit,
+	},
+};
+
+/*
+ * make sure that instruction syntax is valid,
+ * and it fields don't violate partciular instrcution type restrictions.
+ */
+static const char *
+check_syntax(const struct ebpf_insn *ins)
+{
+
+	uint8_t op;
+	uint16_t off;
+	uint32_t imm;
+
+	op = ins->code;
+
+	if (ins_chk[op].mask.dreg == 0)
+		return "invalid opcode";
+
+	if ((ins_chk[op].mask.dreg & 1 << ins->dst_reg) == 0)
+		return "invalid dst-reg field";
+
+	if ((ins_chk[op].mask.sreg & 1 << ins->src_reg) == 0)
+		return "invalid src-reg field";
+
+	off = ins->off;
+	if (ins_chk[op].off.min > off || ins_chk[op].off.max < off)
+		return "invalid off field";
+
+	imm = ins->imm;
+	if (ins_chk[op].imm.min > imm || ins_chk[op].imm.max < imm)
+		return "invalid imm field";
+
+	if (ins_chk[op].check != NULL)
+		return ins_chk[op].check(ins);
+
+	return NULL;
+}
+
+/*
+ * helper function, return instruction index for the given node.
+ */
+static uint32_t
+get_node_idx(const struct bpf_verifier *bvf, const struct inst_node *node)
+{
+	return node - bvf->in;
+}
+
+/*
+ * helper function, used to walk through constructed CFG.
+ */
+static struct inst_node *
+get_next_node(struct bpf_verifier *bvf, struct inst_node *node)
+{
+	uint32_t ce, ne, dst;
+
+	ne = node->nb_edge;
+	ce = node->cur_edge;
+	if (ce == ne)
+		return NULL;
+
+	node->cur_edge++;
+	dst = node->edge_dest[ce];
+	return bvf->in + dst;
+}
+
+static void
+set_node_colour(struct bpf_verifier *bvf, struct inst_node *node,
+	uint32_t new)
+{
+	uint32_t prev;
+
+	prev = node->colour;
+	node->colour = new;
+
+	bvf->node_colour[prev]--;
+	bvf->node_colour[new]++;
+}
+
+/*
+ * helper function, add new edge between two nodes.
+ */
+static int
+add_edge(struct bpf_verifier *bvf, struct inst_node *node, uint32_t nidx)
+{
+	uint32_t ne;
+
+	if (nidx > bvf->prm->nb_ins) {
+		RTE_BPF_LOG(ERR, "%s: program boundary violation at pc: %u, "
+			"next pc: %u\n",
+			__func__, get_node_idx(bvf, node), nidx);
+		return -EINVAL;
+	}
+
+	ne = node->nb_edge;
+	if (ne >= RTE_DIM(node->edge_dest)) {
+		RTE_BPF_LOG(ERR, "%s: internal error at pc: %u\n",
+			__func__, get_node_idx(bvf, node));
+		return -EINVAL;
+	}
+
+	node->edge_dest[ne] = nidx;
+	node->nb_edge = ne + 1;
+	return 0;
+}
+
+/*
+ * helper function, determine type of edge between two nodes.
+ */
+static void
+set_edge_type(struct bpf_verifier *bvf, struct inst_node *node,
+	const struct inst_node *next)
+{
+	uint32_t ce, clr, type;
+
+	ce = node->cur_edge - 1;
+	clr = next->colour;
+
+	type = UNKNOWN_EDGE;
+
+	if (clr == WHITE)
+		type = TREE_EDGE;
+	else if (clr == GREY)
+		type = BACK_EDGE;
+	else if (clr == BLACK)
+		/*
+		 * in fact it could be either direct or cross edge,
+		 * but for now, we don't need to distinguish between them.
+		 */
+		type = CROSS_EDGE;
+
+	node->edge_type[ce] = type;
+	bvf->edge_type[type]++;
+}
+
+static struct inst_node *
+get_prev_node(struct bpf_verifier *bvf, struct inst_node *node)
+{
+	return  bvf->in + node->prev_node;
+}
+
+/*
+ * Depth-First Search (DFS) through previously constructed
+ * Control Flow Graph (CFG).
+ * Information collected at this path would be used later
+ * to determine is there any loops, and/or unreachable instructions.
+ */
+static void
+dfs(struct bpf_verifier *bvf)
+{
+	struct inst_node *next, *node;
+
+	node = bvf->in;
+	while (node != NULL) {
+
+		if (node->colour == WHITE)
+			set_node_colour(bvf, node, GREY);
+
+		if (node->colour == GREY) {
+
+			/* find next unprocessed child node */
+			do {
+				next = get_next_node(bvf, node);
+				if (next == NULL)
+					break;
+				set_edge_type(bvf, node, next);
+			} while (next->colour != WHITE);
+
+			if (next != NULL) {
+				/* proceed with next child */
+				next->prev_node = get_node_idx(bvf, node);
+				node = next;
+			} else {
+				/*
+				 * finished with current node and all it's kids,
+				 * proceed with parent
+				 */
+				set_node_colour(bvf, node, BLACK);
+				node->cur_edge = 0;
+				node = get_prev_node(bvf, node);
+			}
+		} else
+			node = NULL;
+	}
+}
+
+/*
+ * report unreachable instructions.
+ */
+static void
+log_unreachable(const struct bpf_verifier *bvf)
+{
+	uint32_t i;
+	struct inst_node *node;
+	const struct ebpf_insn *ins;
+
+	for (i = 0; i != bvf->prm->nb_ins; i++) {
+
+		node = bvf->in + i;
+		ins = bvf->prm->ins + i;
+
+		if (node->colour == WHITE &&
+				ins->code != (BPF_LD | BPF_IMM | EBPF_DW))
+			RTE_BPF_LOG(ERR, "unreachable code at pc: %u;\n", i);
+	}
+}
+
+/*
+ * report loops detected.
+ */
+static void
+log_loop(const struct bpf_verifier *bvf)
+{
+	uint32_t i, j;
+	struct inst_node *node;
+
+	for (i = 0; i != bvf->prm->nb_ins; i++) {
+
+		node = bvf->in + i;
+		if (node->colour != BLACK)
+			continue;
+
+		for (j = 0; j != node->nb_edge; j++) {
+			if (node->edge_type[j] == BACK_EDGE)
+				RTE_BPF_LOG(ERR,
+					"loop at pc:%u --> pc:%u;\n",
+					i, node->edge_dest[j]);
+		}
+	}
+}
+
+/*
+ * First pass goes though all instructions in the set, checks that each
+ * instruction is a valid one (correct syntax, valid field values, etc.)
+ * and constructs control flow graph (CFG).
+ * Then deapth-first search is performed over the constructed graph.
+ * Programs with unreachable instructions and/or loops will be rejected.
+ */
+static int
+validate(struct bpf_verifier *bvf)
+{
+	int32_t rc;
+	uint32_t i;
+	struct inst_node *node;
+	const struct ebpf_insn *ins;
+	const char *err;
+
+	rc = 0;
+	for (i = 0; i < bvf->prm->nb_ins; i++) {
+
+		ins = bvf->prm->ins + i;
+		node = bvf->in + i;
+
+		err = check_syntax(ins);
+		if (err != 0) {
+			RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+				__func__, err, i);
+			rc |= -EINVAL;
+		}
+
+		/*
+		 * construct CFG, jcc nodes have to outgoing edges,
+		 * 'exit' nodes - none, all others nodes have exaclty one
+		 * outgoing edge.
+		 */
+		switch (ins->code) {
+		case (BPF_JMP | EBPF_EXIT):
+			break;
+		case (BPF_JMP | BPF_JEQ | BPF_K):
+		case (BPF_JMP | EBPF_JNE | BPF_K):
+		case (BPF_JMP | BPF_JGT | BPF_K):
+		case (BPF_JMP | EBPF_JLT | BPF_K):
+		case (BPF_JMP | BPF_JGE | BPF_K):
+		case (BPF_JMP | EBPF_JLE | BPF_K):
+		case (BPF_JMP | EBPF_JSGT | BPF_K):
+		case (BPF_JMP | EBPF_JSLT | BPF_K):
+		case (BPF_JMP | EBPF_JSGE | BPF_K):
+		case (BPF_JMP | EBPF_JSLE | BPF_K):
+		case (BPF_JMP | BPF_JSET | BPF_K):
+		case (BPF_JMP | BPF_JEQ | BPF_X):
+		case (BPF_JMP | EBPF_JNE | BPF_X):
+		case (BPF_JMP | BPF_JGT | BPF_X):
+		case (BPF_JMP | EBPF_JLT | BPF_X):
+		case (BPF_JMP | BPF_JGE | BPF_X):
+		case (BPF_JMP | EBPF_JLE | BPF_X):
+		case (BPF_JMP | EBPF_JSGT | BPF_X):
+		case (BPF_JMP | EBPF_JSLT | BPF_X):
+		case (BPF_JMP | EBPF_JSGE | BPF_X):
+		case (BPF_JMP | EBPF_JSLE | BPF_X):
+		case (BPF_JMP | BPF_JSET | BPF_X):
+			rc |= add_edge(bvf, node, i + ins->off + 1);
+			rc |= add_edge(bvf, node, i + 1);
+			bvf->nb_jcc_nodes++;
+			break;
+		case (BPF_JMP | BPF_JA):
+			rc |= add_edge(bvf, node, i + ins->off + 1);
+			break;
+		/* load 64 bit immediate value */
+		case (BPF_LD | BPF_IMM | EBPF_DW):
+			rc |= add_edge(bvf, node, i + 2);
+			i++;
+			break;
+		default:
+			rc |= add_edge(bvf, node, i + 1);
+			break;
+		}
+
+		bvf->nb_nodes++;
+		bvf->node_colour[WHITE]++;
+	}
+
+	if (rc != 0)
+		return rc;
+
+	dfs(bvf);
+
+	RTE_BPF_LOG(DEBUG, "%s(%p) stats:\n"
+		"nb_nodes=%u;\n"
+		"nb_jcc_nodes=%u;\n"
+		"node_color={[WHITE]=%u, [GREY]=%u,, [BLACK]=%u};\n"
+		"edge_type={[UNKNOWN]=%u, [TREE]=%u, [BACK]=%u, [CROSS]=%u};\n",
+		__func__, bvf,
+		bvf->nb_nodes,
+		bvf->nb_jcc_nodes,
+		bvf->node_colour[WHITE], bvf->node_colour[GREY],
+			bvf->node_colour[BLACK],
+		bvf->edge_type[UNKNOWN_EDGE], bvf->edge_type[TREE_EDGE],
+		bvf->edge_type[BACK_EDGE], bvf->edge_type[CROSS_EDGE]);
+
+	if (bvf->node_colour[BLACK] != bvf->nb_nodes) {
+		RTE_BPF_LOG(ERR, "%s(%p) unreachable instructions;\n",
+			__func__, bvf);
+		log_unreachable(bvf);
+		return -EINVAL;
+	}
+
+	if (bvf->node_colour[GREY] != 0 || bvf->node_colour[WHITE] != 0 ||
+			bvf->edge_type[UNKNOWN_EDGE] != 0) {
+		RTE_BPF_LOG(ERR, "%s(%p) DFS internal error;\n",
+			__func__, bvf);
+		return -EINVAL;
+	}
+
+	if (bvf->edge_type[BACK_EDGE] != 0) {
+		RTE_BPF_LOG(ERR, "%s(%p) loops detected;\n",
+			__func__, bvf);
+		log_loop(bvf);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * helper functions get/free eval states.
+ */
+static struct bpf_eval_state *
+pull_eval_state(struct bpf_verifier *bvf)
+{
+	uint32_t n;
+
+	n = bvf->evst_pool.cur;
+	if (n == bvf->evst_pool.num)
+		return NULL;
+
+	bvf->evst_pool.cur = n + 1;
+	return bvf->evst_pool.ent + n;
+}
+
+static void
+push_eval_state(struct bpf_verifier *bvf)
+{
+	bvf->evst_pool.cur--;
+}
+
+static void
+evst_pool_fini(struct bpf_verifier *bvf)
+{
+	bvf->evst = NULL;
+	free(bvf->evst_pool.ent);
+	memset(&bvf->evst_pool, 0, sizeof(bvf->evst_pool));
+}
+
+static int
+evst_pool_init(struct bpf_verifier *bvf)
+{
+	uint32_t n;
+
+	n = bvf->nb_jcc_nodes + 1;
+
+	bvf->evst_pool.ent = calloc(n, sizeof(bvf->evst_pool.ent[0]));
+	if (bvf->evst_pool.ent == NULL)
+		return -ENOMEM;
+
+	bvf->evst_pool.num = n;
+	bvf->evst_pool.cur = 0;
+
+	bvf->evst = pull_eval_state(bvf);
+	return 0;
+}
+
+/*
+ * Save current eval state.
+ */
+static int
+save_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
+{
+	struct bpf_eval_state *st;
+
+	/* get new eval_state for this node */
+	st = pull_eval_state(bvf);
+	if (st == NULL) {
+		RTE_BPF_LOG(ERR,
+			"%s: internal error (out of space) at pc: %u\n",
+			__func__, get_node_idx(bvf, node));
+		return -ENOMEM;
+	}
+
+	/* make a copy of current state */
+	memcpy(st, bvf->evst, sizeof(*st));
+
+	/* swap current state with new one */
+	node->evst = bvf->evst;
+	bvf->evst = st;
+
+	RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n",
+		__func__, bvf, get_node_idx(bvf, node), node->evst, bvf->evst);
+
+	return 0;
+}
+
+/*
+ * Restore previous eval state and mark current eval state as free.
+ */
+static void
+restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
+{
+	RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n",
+		__func__, bvf, get_node_idx(bvf, node), bvf->evst, node->evst);
+
+	bvf->evst = node->evst;
+	node->evst = NULL;
+	push_eval_state(bvf);
+}
+
+static void
+log_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins,
+	uint32_t pc, int32_t loglvl)
+{
+	const struct bpf_eval_state *st;
+	const struct bpf_reg_val *rv;
+
+	rte_log(loglvl, rte_bpf_logtype, "%s(pc=%u):\n", __func__, pc);
+
+	st = bvf->evst;
+	rv = st->rv + ins->dst_reg;
+
+	rte_log(loglvl, rte_bpf_logtype,
+		"r%u={\n"
+		"\tv={type=%u, size=%zu},\n"
+		"\tmask=0x%" PRIx64 ",\n"
+		"\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n"
+		"\ts={min=%" PRId64 ", max=%" PRId64 "},\n"
+		"};\n",
+		ins->dst_reg,
+		rv->v.type, rv->v.size,
+		rv->mask,
+		rv->u.min, rv->u.max,
+		rv->s.min, rv->s.max);
+}
+
+/*
+ * Do second pass through CFG and try to evaluate instructions
+ * via each possible path.
+ * Right now evaluation functionality is quite limited.
+ * Still need to add extra checks for:
+ * - use/return uninitialized registers.
+ * - use uninitialized data from the stack.
+ * - memory boundaries violation.
+ */
+static int
+evaluate(struct bpf_verifier *bvf)
+{
+	int32_t rc;
+	uint32_t idx, op;
+	const char *err;
+	const struct ebpf_insn *ins;
+	struct inst_node *next, *node;
+
+	/* initial state of frame pointer */
+	static const struct bpf_reg_val rvfp = {
+		.v = {
+			.type = RTE_BPF_ARG_PTR_STACK,
+			.size = MAX_BPF_STACK_SIZE,
+		},
+		.mask = UINT64_MAX,
+		.u = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+		.s = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+	};
+
+	bvf->evst->rv[EBPF_REG_1].v = bvf->prm->prog_arg;
+	bvf->evst->rv[EBPF_REG_1].mask = UINT64_MAX;
+	if (bvf->prm->prog_arg.type == RTE_BPF_ARG_RAW)
+		eval_max_bound(bvf->evst->rv + EBPF_REG_1, UINT64_MAX);
+
+	bvf->evst->rv[EBPF_REG_10] = rvfp;
+
+	ins = bvf->prm->ins;
+	node = bvf->in;
+	next = node;
+	rc = 0;
+
+	while (node != NULL && rc == 0) {
+
+		/*
+		 * current node evaluation, make sure we evaluate
+		 * each node only once.
+		 */
+		if (next != NULL) {
+
+			bvf->evin = node;
+			idx = get_node_idx(bvf, node);
+			op = ins[idx].code;
+
+			/* for jcc node make a copy of evaluatoion state */
+			if (node->nb_edge > 1)
+				rc |= save_eval_state(bvf, node);
+
+			if (ins_chk[op].eval != NULL && rc == 0) {
+				err = ins_chk[op].eval(bvf, ins + idx);
+				if (err != NULL) {
+					RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+						__func__, err, idx);
+					rc = -EINVAL;
+				}
+			}
+
+			log_eval_state(bvf, ins + idx, idx, RTE_LOG_DEBUG);
+			bvf->evin = NULL;
+		}
+
+		/* proceed through CFG */
+		next = get_next_node(bvf, node);
+		if (next != NULL) {
+
+			/* proceed with next child */
+			if (node->cur_edge == node->nb_edge &&
+					node->evst != NULL)
+				restore_eval_state(bvf, node);
+
+			next->prev_node = get_node_idx(bvf, node);
+			node = next;
+		} else {
+			/*
+			 * finished with current node and all it's kids,
+			 * proceed with parent
+			 */
+			node->cur_edge = 0;
+			node = get_prev_node(bvf, node);
+
+			/* finished */
+			if (node == bvf->in)
+				node = NULL;
+		}
+	}
+
+	return rc;
+}
+
+int
+bpf_validate(struct rte_bpf *bpf)
+{
+	int32_t rc;
+	struct bpf_verifier bvf;
+
+	/* check input argument type, don't allow mbuf ptr on 32-bit */
+	if (bpf->prm.prog_arg.type != RTE_BPF_ARG_RAW &&
+			bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR &&
+			(sizeof(uint64_t) != sizeof(uintptr_t) ||
+			bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR_MBUF)) {
+		RTE_BPF_LOG(ERR, "%s: unsupported argument type\n", __func__);
+		return -ENOTSUP;
+	}
+
+	memset(&bvf, 0, sizeof(bvf));
+	bvf.prm = &bpf->prm;
+	bvf.in = calloc(bpf->prm.nb_ins, sizeof(bvf.in[0]));
+	if (bvf.in == NULL)
+		return -ENOMEM;
+
+	rc = validate(&bvf);
+
+	if (rc == 0) {
+		rc = evst_pool_init(&bvf);
+		if (rc == 0)
+			rc = evaluate(&bvf);
+		evst_pool_fini(&bvf);
+	}
+
+	free(bvf.in);
+
+	/* copy collected info */
+	if (rc == 0)
+		bpf->stack_sz = bvf.stack_sz;
+
+	return rc;
+}
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
new file mode 100644
index 00000000..bc0cd78f
--- /dev/null
+++ b/lib/librte_bpf/meson.build
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('bpf.c',
+		'bpf_exec.c',
+		'bpf_load.c',
+		'bpf_pkt.c',
+		'bpf_validate.c')
+
+if arch_subdir == 'x86' and cc.sizeof('void *') == 8
+	sources += files('bpf_jit_x86.c')
+endif
+
+install_headers = files('bpf_def.h',
+			'rte_bpf.h',
+			'rte_bpf_ethdev.h')
+
+deps += ['mbuf', 'net', 'ethdev']
+
+dep = cc.find_library('elf', required: false)
+if dep.found() == true and cc.has_header('libelf.h', dependencies: dep)
+	sources += files('bpf_load_elf.c')
+	ext_deps += dep
+endif
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
new file mode 100644
index 00000000..ad62ef2c
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_H_
+#define _RTE_BPF_H_
+
+/**
+ * @file rte_bpf.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * RTE BPF support.
+ * librte_bpf provides a framework to load and execute eBPF bytecode
+ * inside user-space dpdk based applications.
+ * It supports basic set of features from eBPF spec
+ * (https://www.kernel.org/doc/Documentation/networking/filter.txt).
+ */
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <bpf_def.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Possible types for function/BPF program arguments.
+ */
+enum rte_bpf_arg_type {
+	RTE_BPF_ARG_UNDEF,      /**< undefined */
+	RTE_BPF_ARG_RAW,        /**< scalar value */
+	RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */
+	RTE_BPF_ARG_PTR_MBUF,   /**< pointer to rte_mbuf */
+	RTE_BPF_ARG_PTR_STACK,
+};
+
+/**
+ * function argument information
+ */
+struct rte_bpf_arg {
+	enum rte_bpf_arg_type type;
+	/**
+	 * for ptr type - max size of data buffer it points to
+	 * for raw type - the size (in bytes) of the value
+	 */
+	size_t size;
+	size_t buf_size;
+	/**< for mbuf ptr type, max size of rte_mbuf data buffer */
+};
+
+/**
+ * determine is argument a pointer
+ */
+#define RTE_BPF_ARG_PTR_TYPE(x)	((x) & RTE_BPF_ARG_PTR)
+
+/**
+ * Possible types for external symbols.
+ */
+enum rte_bpf_xtype {
+	RTE_BPF_XTYPE_FUNC, /**< function */
+	RTE_BPF_XTYPE_VAR,  /**< variable */
+	RTE_BPF_XTYPE_NUM
+};
+
+/**
+ * Definition for external symbols available in the BPF program.
+ */
+struct rte_bpf_xsym {
+	const char *name;        /**< name */
+	enum rte_bpf_xtype type; /**< type */
+	union {
+		struct {
+			uint64_t (*val)(uint64_t, uint64_t, uint64_t,
+				uint64_t, uint64_t);
+			uint32_t nb_args;
+			struct rte_bpf_arg args[EBPF_FUNC_MAX_ARGS];
+			/**< Function arguments descriptions. */
+			struct rte_bpf_arg ret; /**< function return value. */
+		} func;
+		struct {
+			void *val; /**< actual memory location */
+			struct rte_bpf_arg desc; /**< type, size, etc. */
+		} var; /**< external variable */
+	};
+};
+
+/**
+ * Input parameters for loading eBPF code.
+ */
+struct rte_bpf_prm {
+	const struct ebpf_insn *ins; /**< array of eBPF instructions */
+	uint32_t nb_ins;            /**< number of instructions in ins */
+	const struct rte_bpf_xsym *xsym;
+	/**< array of external symbols that eBPF code is allowed to reference */
+	uint32_t nb_xsym; /**< number of elements in xsym */
+	struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */
+};
+
+/**
+ * Information about compiled into native ISA eBPF code.
+ */
+struct rte_bpf_jit {
+	uint64_t (*func)(void *); /**< JIT-ed native code */
+	size_t sz;                /**< size of JIT-ed code */
+};
+
+struct rte_bpf;
+
+/**
+ * De-allocate all memory used by this eBPF execution context.
+ *
+ * @param bpf
+ *   BPF handle to destroy.
+ */
+void __rte_experimental
+rte_bpf_destroy(struct rte_bpf *bpf);
+
+/**
+ * Create a new eBPF execution context and load given BPF code into it.
+ *
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @return
+ *   BPF handle that is used in future BPF operations,
+ *   or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ *   - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf * __rte_experimental
+rte_bpf_load(const struct rte_bpf_prm *prm);
+
+/**
+ * Create a new eBPF execution context and load BPF code from given ELF
+ * file into it.
+ *
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @param fname
+ *  Pathname for a ELF file.
+ * @param sname
+ *  Name of the executable section within the file to load.
+ * @return
+ *   BPF handle that is used in future BPF operations,
+ *   or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ *   - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf * __rte_experimental
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+		const char *sname);
+/**
+ * Execute given BPF bytecode.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   pointer to input context.
+ * @return
+ *   BPF execution return value.
+ */
+uint64_t __rte_experimental
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx);
+
+/**
+ * Execute given BPF bytecode over a set of input contexts.
+ *
+ * @param bpf
+ *   handle for the BPF code to execute.
+ * @param ctx
+ *   array of pointers to the input contexts.
+ * @param rc
+ *   array of return values (one per input).
+ * @param num
+ *   number of elements in ctx[] (and rc[]).
+ * @return
+ *   number of successfully processed inputs.
+ */
+uint32_t __rte_experimental
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+		uint32_t num);
+
+/**
+ * Provide information about natively compield code for given BPF handle.
+ *
+ * @param bpf
+ *   handle for the BPF code.
+ * @param jit
+ *   pointer to the rte_bpf_jit structure to be filled with related data.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int __rte_experimental
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_H_ */
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
new file mode 100644
index 00000000..31731e7a
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_ETHDEV_H_
+#define _RTE_BPF_ETHDEV_H_
+
+/**
+ * @file rte_bpf_ethdev.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * API to install BPF filter as RX/TX callbacks for eth devices.
+ * Note that right now:
+ * - it is not MT safe, i.e. it is not allowed to do load/unload for the
+ *   same port/queue from different threads in parallel.
+ * - though it allows to do load/unload at runtime
+ *   (while RX/TX is ongoing on given port/queue).
+ * - allows only one BPF program per port/queue,
+ * i.e. new load will replace previously loaded for that port/queue BPF program.
+ * Filter behaviour - if BPF program returns zero value for a given packet,
+ * then it will be dropped inside callback and no further processing
+ *   on RX - it will be dropped inside callback and no further processing
+ *   for that packet will happen.
+ *   on TX - packet will remain unsent, and it is responsibility of the user
+ *   to handle such situation (drop, try to send again, etc.).
+ */
+
+#include <rte_bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+	RTE_BPF_ETH_F_NONE = 0,
+	RTE_BPF_ETH_F_JIT  = 0x1, /*< use compiled into native ISA code */
+};
+
+/**
+ * Unload previously loaded BPF program (if any) from given RX port/queue
+ * and remove appropriate RX port/queue callback.
+ *
+ * @param port
+ *   The identifier of the ethernet port
+ * @param queue
+ *   The identifier of the RX queue on the given port
+ */
+void __rte_experimental
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue);
+
+/**
+ * Unload previously loaded BPF program (if any) from given TX port/queue
+ * and remove appropriate TX port/queue callback.
+ *
+ * @param port
+ *   The identifier of the ethernet port
+ * @param queue
+ *   The identifier of the TX queue on the given port
+ */
+void __rte_experimental
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
+
+/**
+ * Load BPF program from the ELF file and install callback to execute it
+ * on given RX port/queue.
+ *
+ * @param port
+ *   The identifier of the ethernet port
+ * @param queue
+ *   The identifier of the RX queue on the given port
+ * @param fname
+ *  Pathname for a ELF file.
+ * @param sname
+ *  Name of the executable section within the file to load.
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @param flags
+ *  Flags that define expected expected behavior of the loaded filter
+ *  (i.e. jited/non-jited version to use).
+ * @return
+ *   Zero on successful completion or negative error code otherwise.
+ */
+int __rte_experimental
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+	uint32_t flags);
+
+/**
+ * Load BPF program from the ELF file and install callback to execute it
+ * on given TX port/queue.
+ *
+ * @param port
+ *   The identifier of the ethernet port
+ * @param queue
+ *   The identifier of the TX queue on the given port
+ * @param fname
+ *  Pathname for a ELF file.
+ * @param sname
+ *  Name of the executable section within the file to load.
+ * @param prm
+ *  Parameters used to create and initialise the BPF exeution context.
+ * @param flags
+ *  Flags that define expected expected behavior of the loaded filter
+ *  (i.e. jited/non-jited version to use).
+ * @return
+ *   Zero on successful completion or negative error code otherwise.
+ */
+int __rte_experimental
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+	uint32_t flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_ETHDEV_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
new file mode 100644
index 00000000..a203e088
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -0,0 +1,16 @@
+EXPERIMENTAL {
+	global:
+
+	rte_bpf_destroy;
+	rte_bpf_elf_load;
+	rte_bpf_eth_rx_elf_load;
+	rte_bpf_eth_rx_unload;
+	rte_bpf_eth_tx_elf_load;
+	rte_bpf_eth_tx_unload;
+	rte_bpf_exec;
+	rte_bpf_exec_burst;
+	rte_bpf_get_jit;
+	rte_bpf_load;
+
+	local: *;
+};
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index e88e4e11..9666e90c 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -208,9 +208,6 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 	int err = CMDLINE_PARSE_NOMATCH;
 	int tok;
 	cmdline_parse_ctx_t *ctx;
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
-	char debug_buf[BUFSIZ];
-#endif
 	char *result_buf = result.buf;
 
 	if (!cl || !buf)
@@ -250,10 +247,8 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 		return linelen;
 	}
 
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
-	snprintf(debug_buf, (linelen>64 ? 64 : linelen), "%s", buf);
-	debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf);
-#endif
+	debug_printf("Parse line : len=%d, <%.*s>\n",
+		     linelen, linelen > 64 ? 64 : linelen, buf);
 
 	/* parse it !! */
 	inst = ctx[inst_num];
@@ -436,7 +431,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 				if ((unsigned)(comp_len + 1) > size)
 					return 0;
 
-				snprintf(dst, size, "%s", comp_buf);
+				strlcpy(dst, comp_buf, size);
 				dst[comp_len] = 0;
 				return 2;
 			}
@@ -513,7 +508,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 					continue;
 				}
 				(*state)++;
-				l=snprintf(dst, size, "%s", tmpbuf);
+				l=strlcpy(dst, tmpbuf, size);
 				if (l>=0 && token_hdr.ops->get_help) {
 					token_hdr.ops->get_help(token_p, tmpbuf,
 								sizeof(tmpbuf));
diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c
index 8d281192..24e04755 100644
--- a/lib/librte_cmdline/cmdline_parse_etheraddr.c
+++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c
@@ -102,7 +102,7 @@ cmdline_parse_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
 			(token_len != ETHER_ADDRSTRLENSHORT - 1))
 		return -1;
 
-	snprintf(ether_str, token_len+1, "%s", buf);
+	strlcpy(ether_str, buf, token_len + 1);
 
 	tmp = my_ether_aton(ether_str);
 	if (tmp == NULL)
diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.c b/lib/librte_cmdline/cmdline_parse_ipaddr.c
index ae6ea100..6647f569 100644
--- a/lib/librte_cmdline/cmdline_parse_ipaddr.c
+++ b/lib/librte_cmdline/cmdline_parse_ipaddr.c
@@ -4,26 +4,6 @@
  * All rights reserved.
  */
 
-/*
- * For inet_ntop() functions:
- *
- * Copyright (c) 1996 by Internet Software Consortium.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
- * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
- * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
@@ -31,6 +11,7 @@
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
+#include <arpa/inet.h>
 #include <netinet/in.h>
 #ifndef __linux__
 #ifndef __FreeBSD__
@@ -52,205 +33,9 @@ struct cmdline_token_ops cmdline_token_ipaddr_ops = {
 	.get_help = cmdline_get_help_ipaddr,
 };
 
-#define INADDRSZ 4
-#define IN6ADDRSZ 16
 #define PREFIXMAX 128
 #define V4PREFIXMAX 32
 
-/*
- * WARNING: Don't even consider trying to compile this on a system where
- * sizeof(int) < 4.  sizeof(int) > 4 is fine; all the world's not a VAX.
- */
-
-static int inet_pton4(const char *src, unsigned char *dst);
-static int inet_pton6(const char *src, unsigned char *dst);
-
-/* int
- * inet_pton(af, src, dst)
- *      convert from presentation format (which usually means ASCII printable)
- *      to network format (which is usually some kind of binary format).
- * return:
- *      1 if the address was valid for the specified address family
- *      0 if the address wasn't valid (`dst' is untouched in this case)
- *      -1 if some other error occurred (`dst' is untouched in this case, too)
- * author:
- *      Paul Vixie, 1996.
- */
-static int
-my_inet_pton(int af, const char *src, void *dst)
-{
-	switch (af) {
-		case AF_INET:
-			return inet_pton4(src, dst);
-		case AF_INET6:
-			return inet_pton6(src, dst);
-		default:
-			errno = EAFNOSUPPORT;
-			return -1;
-	}
-	/* NOTREACHED */
-}
-
-/* int
- * inet_pton4(src, dst)
- *      like inet_aton() but without all the hexadecimal and shorthand.
- * return:
- *      1 if `src' is a valid dotted quad, else 0.
- * notice:
- *      does not touch `dst' unless it's returning 1.
- * author:
- *      Paul Vixie, 1996.
- */
-static int
-inet_pton4(const char *src, unsigned char *dst)
-{
-	static const char digits[] = "0123456789";
-	int saw_digit, octets, ch;
-	unsigned char tmp[INADDRSZ], *tp;
-
-	saw_digit = 0;
-	octets = 0;
-	*(tp = tmp) = 0;
-	while ((ch = *src++) != '\0') {
-		const char *pch;
-
-		if ((pch = strchr(digits, ch)) != NULL) {
-			unsigned int new = *tp * 10 + (pch - digits);
-
-			if (new > 255)
-				return 0;
-			if (! saw_digit) {
-				if (++octets > 4)
-					return 0;
-				saw_digit = 1;
-			}
-			*tp = (unsigned char)new;
-		} else if (ch == '.' && saw_digit) {
-			if (octets == 4)
-				return 0;
-			*++tp = 0;
-			saw_digit = 0;
-		} else
-			return 0;
-	}
-	if (octets < 4)
-		return 0;
-
-	memcpy(dst, tmp, INADDRSZ);
-	return 1;
-}
-
-/* int
- * inet_pton6(src, dst)
- *      convert presentation level address to network order binary form.
- * return:
- *      1 if `src' is a valid [RFC1884 2.2] address, else 0.
- * notice:
- *      (1) does not touch `dst' unless it's returning 1.
- *      (2) :: in a full address is silently ignored.
- * credit:
- *      inspired by Mark Andrews.
- * author:
- *      Paul Vixie, 1996.
- */
-static int
-inet_pton6(const char *src, unsigned char *dst)
-{
-	static const char xdigits_l[] = "0123456789abcdef",
-		xdigits_u[] = "0123456789ABCDEF";
-	unsigned char tmp[IN6ADDRSZ], *tp = 0, *endp = 0, *colonp = 0;
-	const char *xdigits = 0, *curtok = 0;
-	int ch = 0, saw_xdigit = 0, count_xdigit = 0;
-	unsigned int val = 0;
-	unsigned dbloct_count = 0;
-
-	memset((tp = tmp), '\0', IN6ADDRSZ);
-	endp = tp + IN6ADDRSZ;
-	colonp = NULL;
-	/* Leading :: requires some special handling. */
-	if (*src == ':')
-		if (*++src != ':')
-			return 0;
-	curtok = src;
-	saw_xdigit = count_xdigit = 0;
-	val = 0;
-
-	while ((ch = *src++) != '\0') {
-		const char *pch;
-
-		if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
-			pch = strchr((xdigits = xdigits_u), ch);
-		if (pch != NULL) {
-			if (count_xdigit >= 4)
-				return 0;
-			val <<= 4;
-			val |= (pch - xdigits);
-			if (val > 0xffff)
-				return 0;
-			saw_xdigit = 1;
-			count_xdigit++;
-			continue;
-		}
-		if (ch == ':') {
-			curtok = src;
-			if (!saw_xdigit) {
-				if (colonp)
-					return 0;
-				colonp = tp;
-				continue;
-			} else if (*src == '\0') {
-				return 0;
-			}
-			if (tp + sizeof(int16_t) > endp)
-				return 0;
-			*tp++ = (unsigned char) ((val >> 8) & 0xff);
-			*tp++ = (unsigned char) (val & 0xff);
-			saw_xdigit = 0;
-			count_xdigit = 0;
-			val = 0;
-			dbloct_count++;
-			continue;
-		}
-		if (ch == '.' && ((tp + INADDRSZ) <= endp) &&
-		    inet_pton4(curtok, tp) > 0) {
-			tp += INADDRSZ;
-			saw_xdigit = 0;
-			dbloct_count += 2;
-			break;  /* '\0' was seen by inet_pton4(). */
-		}
-		return 0;
-	}
-	if (saw_xdigit) {
-		if (tp + sizeof(int16_t) > endp)
-			return 0;
-		*tp++ = (unsigned char) ((val >> 8) & 0xff);
-		*tp++ = (unsigned char) (val & 0xff);
-		dbloct_count++;
-	}
-	if (colonp != NULL) {
-		/* if we already have 8 double octets, having a colon means error */
-		if (dbloct_count == 8)
-			return 0;
-
-		/*
-		 * Since some memmove()'s erroneously fail to handle
-		 * overlapping regions, we'll do the shift by hand.
-		 */
-		const int n = tp - colonp;
-		int i;
-
-		for (i = 1; i <= n; i++) {
-			endp[- i] = colonp[n - i];
-			colonp[n - i] = 0;
-		}
-		tp = endp;
-	}
-	if (tp != endp)
-		return 0;
-	memcpy(dst, tmp, IN6ADDRSZ);
-	return 1;
-}
-
 int
 cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 	unsigned ressize)
@@ -277,7 +62,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 	if (token_len >= INET6_ADDRSTRLEN+4)
 		return -1;
 
-	snprintf(ip_str, token_len+1, "%s", buf);
+	strlcpy(ip_str, buf, token_len + 1);
 
 	/* convert the network prefix */
 	if (tk2->ipaddr_data.flags & CMDLINE_IPADDR_NETWORK) {
@@ -299,7 +84,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 
 	/* convert the IP addr */
 	if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V4) &&
-	    my_inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 &&
+	    inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 &&
 		prefixlen <= V4PREFIXMAX) {
 		ipaddr.family = AF_INET;
 		if (res)
@@ -307,7 +92,7 @@ cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 		return token_len;
 	}
 	if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V6) &&
-	    my_inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) {
+	    inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) {
 		ipaddr.family = AF_INET6;
 		if (res)
 			memcpy(res, &ipaddr, sizeof(ipaddr));
diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c
index 5952f343..ad43b522 100644
--- a/lib/librte_cmdline/cmdline_parse_portlist.c
+++ b/lib/librte_cmdline/cmdline_parse_portlist.c
@@ -94,7 +94,7 @@ cmdline_parse_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
 	if (token_len >= PORTLIST_TOKEN_SIZE)
 		return -1;
 
-	snprintf(portlist_str, token_len+1, "%s", buf);
+	strlcpy(portlist_str, buf, token_len + 1);
 
 	if (pl) {
 		pl->map = 0;
diff --git a/lib/librte_cmdline/cmdline_parse_string.c b/lib/librte_cmdline/cmdline_parse_string.c
index abde0412..9cf41d0f 100644
--- a/lib/librte_cmdline/cmdline_parse_string.c
+++ b/lib/librte_cmdline/cmdline_parse_string.c
@@ -125,10 +125,10 @@ cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 	if (res) {
 		if ((sd->str != NULL) && (strcmp(sd->str, TOKEN_STRING_MULTI) == 0))
 			/* we are sure that token_len is < STR_MULTI_TOKEN_SIZE-1 */
-			snprintf(res, STR_MULTI_TOKEN_SIZE, "%s", buf);
+			strlcpy(res, buf, STR_MULTI_TOKEN_SIZE);
 		else
 			/* we are sure that token_len is < STR_TOKEN_SIZE-1 */
-			snprintf(res, STR_TOKEN_SIZE, "%s", buf);
+			strlcpy(res, buf, STR_TOKEN_SIZE);
 
 		*((char *)res + token_len) = 0;
 	}
diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile
index 0c57533c..61089fe7 100644
--- a/lib/librte_compat/Makefile
+++ b/lib/librte_compat/Makefile
@@ -1,33 +1,6 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
+# All rights reserved.
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
diff --git a/lib/librte_compressdev/Makefile b/lib/librte_compressdev/Makefile
new file mode 100644
index 00000000..7ef89e61
--- /dev/null
+++ b/lib/librte_compressdev/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017-2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_compressdev.a
+
+# library version
+LIBABIVER := 1
+
+# build flags
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_eal -lrte_mempool -lrte_kvargs
+
+# library source files
+SRCS-y += rte_compressdev.c rte_compressdev_pmd.c rte_comp.c
+
+# export include files
+SYMLINK-y-include += rte_comp.h
+SYMLINK-y-include += rte_compressdev.h
+# export include files (for PMDs)
+SYMLINK-y-include += rte_compressdev_pmd.h
+SYMLINK-y-include += rte_compressdev_internal.h
+
+# versioning export map
+EXPORT_MAP := rte_compressdev_version.map
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_compressdev/meson.build b/lib/librte_compressdev/meson.build
new file mode 100644
index 00000000..5416571c
--- /dev/null
+++ b/lib/librte_compressdev/meson.build
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('rte_compressdev.c',
+	'rte_compressdev_pmd.c',
+	'rte_comp.c')
+headers = files('rte_compressdev.h',
+	'rte_compressdev_pmd.h',
+	'rte_compressdev_internal.h',
+	'rte_comp.h')
+deps += ['kvargs', 'mbuf']
diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c
new file mode 100644
index 00000000..98ad0cfd
--- /dev/null
+++ b/lib/librte_compressdev/rte_comp.c
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include "rte_comp.h"
+#include "rte_compressdev.h"
+#include "rte_compressdev_internal.h"
+
+const char * __rte_experimental
+rte_comp_get_feature_name(uint64_t flag)
+{
+	switch (flag) {
+	case RTE_COMP_FF_STATEFUL_COMPRESSION:
+		return "STATEFUL_COMPRESSION";
+	case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
+		return "STATEFUL_DECOMPRESSION";
+	case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
+		return "OOP_SGL_IN_SGL_OUT";
+	case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
+		return "OOP_SGL_IN_LB_OUT";
+	case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
+		return "OOP_LB_IN_SGL_OUT";
+	case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
+		return "MULTI_PKT_CHECKSUM";
+	case RTE_COMP_FF_ADLER32_CHECKSUM:
+		return "ADLER32_CHECKSUM";
+	case RTE_COMP_FF_CRC32_CHECKSUM:
+		return "CRC32_CHECKSUM";
+	case RTE_COMP_FF_CRC32_ADLER32_CHECKSUM:
+		return "CRC32_ADLER32_CHECKSUM";
+	case RTE_COMP_FF_NONCOMPRESSED_BLOCKS:
+		return "NONCOMPRESSED_BLOCKS";
+	case RTE_COMP_FF_SHA1_HASH:
+		return "SHA1_HASH";
+	case RTE_COMP_FF_SHA2_SHA256_HASH:
+		return "SHA2_SHA256_HASH";
+	case RTE_COMP_FF_SHAREABLE_PRIV_XFORM:
+		return "SHAREABLE_PRIV_XFORM";
+	case RTE_COMP_FF_HUFFMAN_FIXED:
+		return "HUFFMAN_FIXED";
+	case RTE_COMP_FF_HUFFMAN_DYNAMIC:
+		return "HUFFMAN_DYNAMIC";
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * Reset the fields of an operation to their default values.
+ *
+ * @note The private data associated with the operation is not zeroed.
+ *
+ * @param op
+ *   The operation to be reset
+ */
+static inline void
+rte_comp_op_reset(struct rte_comp_op *op)
+{
+	struct rte_mempool *tmp_mp = op->mempool;
+	rte_iova_t tmp_iova_addr = op->iova_addr;
+
+	memset(op, 0, sizeof(struct rte_comp_op));
+	op->status = RTE_COMP_OP_STATUS_NOT_PROCESSED;
+	op->iova_addr = tmp_iova_addr;
+	op->mempool = tmp_mp;
+}
+
+/**
+ * Private data structure belonging to an operation pool.
+ */
+struct rte_comp_op_pool_private {
+	uint16_t user_size;
+	/**< Size of private user data with each operation. */
+};
+
+/**
+ * Bulk allocate raw element from mempool and return as comp operations
+ *
+ * @param mempool
+ *   Compress operation mempool
+ * @param ops
+ *   Array to place allocated operations
+ * @param nb_ops
+ *   Number of operations to allocate
+ * @return
+ *   - 0: Success
+ *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ */
+static inline int
+rte_comp_op_raw_bulk_alloc(struct rte_mempool *mempool,
+		struct rte_comp_op **ops, uint16_t nb_ops)
+{
+	if (rte_mempool_get_bulk(mempool, (void **)ops, nb_ops) == 0)
+		return nb_ops;
+
+	return 0;
+}
+
+/** Initialise rte_comp_op mempool element */
+static void
+rte_comp_op_init(struct rte_mempool *mempool,
+		__rte_unused void *opaque_arg,
+		void *_op_data,
+		__rte_unused unsigned int i)
+{
+	struct rte_comp_op *op = _op_data;
+
+	memset(_op_data, 0, mempool->elt_size);
+
+	op->status = RTE_COMP_OP_STATUS_NOT_PROCESSED;
+	op->iova_addr = rte_mem_virt2iova(_op_data);
+	op->mempool = mempool;
+}
+
+struct rte_mempool * __rte_experimental
+rte_comp_op_pool_create(const char *name,
+		unsigned int nb_elts, unsigned int cache_size,
+		uint16_t user_size, int socket_id)
+{
+	struct rte_comp_op_pool_private *priv;
+
+	unsigned int elt_size = sizeof(struct rte_comp_op) + user_size;
+
+	/* lookup mempool in case already allocated */
+	struct rte_mempool *mp = rte_mempool_lookup(name);
+
+	if (mp != NULL) {
+		priv = (struct rte_comp_op_pool_private *)
+				rte_mempool_get_priv(mp);
+
+		if (mp->elt_size != elt_size ||
+				mp->cache_size < cache_size ||
+				mp->size < nb_elts ||
+				priv->user_size <  user_size) {
+			mp = NULL;
+			COMPRESSDEV_LOG(ERR,
+		"Mempool %s already exists but with incompatible parameters",
+					name);
+			return NULL;
+		}
+		return mp;
+	}
+
+	mp = rte_mempool_create(
+			name,
+			nb_elts,
+			elt_size,
+			cache_size,
+			sizeof(struct rte_comp_op_pool_private),
+			NULL,
+			NULL,
+			rte_comp_op_init,
+			NULL,
+			socket_id,
+			0);
+
+	if (mp == NULL) {
+		COMPRESSDEV_LOG(ERR, "Failed to create mempool %s", name);
+		return NULL;
+	}
+
+	priv = (struct rte_comp_op_pool_private *)
+			rte_mempool_get_priv(mp);
+
+	priv->user_size = user_size;
+
+	return mp;
+}
+
+struct rte_comp_op * __rte_experimental
+rte_comp_op_alloc(struct rte_mempool *mempool)
+{
+	struct rte_comp_op *op = NULL;
+	int retval;
+
+	retval = rte_comp_op_raw_bulk_alloc(mempool, &op, 1);
+	if (unlikely(retval < 0))
+		return NULL;
+
+	rte_comp_op_reset(op);
+
+	return op;
+}
+
+int __rte_experimental
+rte_comp_op_bulk_alloc(struct rte_mempool *mempool,
+		struct rte_comp_op **ops, uint16_t nb_ops)
+{
+	int ret;
+	uint16_t i;
+
+	ret = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops);
+	if (unlikely(ret < nb_ops))
+		return ret;
+
+	for (i = 0; i < nb_ops; i++)
+		rte_comp_op_reset(ops[i]);
+
+	return nb_ops;
+}
+
+/**
+ * free operation structure
+ * If operation has been allocate from a rte_mempool, then the operation will
+ * be returned to the mempool.
+ *
+ * @param op
+ *   Compress operation
+ */
+void __rte_experimental
+rte_comp_op_free(struct rte_comp_op *op)
+{
+	if (op != NULL && op->mempool != NULL)
+		rte_mempool_put(op->mempool, op);
+}
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h
new file mode 100644
index 00000000..ee9056ea
--- /dev/null
+++ b/lib/librte_compressdev/rte_comp.h
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _RTE_COMP_H_
+#define _RTE_COMP_H_
+
+/**
+ * @file rte_comp.h
+ *
+ * RTE definitions for Data Compression Service
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+/**
+ * compression service feature flags
+ *
+ * @note New features flags should be added to the end of the list
+ *
+ * Keep these flags synchronised with rte_comp_get_feature_name()
+ */
+#define RTE_COMP_FF_STATEFUL_COMPRESSION	(1ULL << 0)
+/**< Stateful compression is supported */
+#define RTE_COMP_FF_STATEFUL_DECOMPRESSION	(1ULL << 1)
+/**< Stateful decompression is supported */
+#define RTE_COMP_FF_OOP_SGL_IN_SGL_OUT		(1ULL << 2)
+/**< Out-of-place Scatter-gather (SGL) buffers,
+ * with multiple segments, are supported in input and output
+ */
+#define RTE_COMP_FF_OOP_SGL_IN_LB_OUT		(1ULL << 3)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment, in output
+ */
+#define RTE_COMP_FF_OOP_LB_IN_SGL_OUT		(1ULL << 4)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_COMP_FF_ADLER32_CHECKSUM		(1ULL << 5)
+/**< Adler-32 Checksum is supported */
+#define RTE_COMP_FF_CRC32_CHECKSUM		(1ULL << 6)
+/**< CRC32 Checksum is supported */
+#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM	(1ULL << 7)
+/**< Adler-32/CRC32 Checksum is supported */
+#define RTE_COMP_FF_MULTI_PKT_CHECKSUM		(1ULL << 8)
+/**< Generation of checksum across multiple stateless packets is supported */
+#define RTE_COMP_FF_SHA1_HASH			(1ULL << 9)
+/**< SHA1 Hash is supported */
+#define RTE_COMP_FF_SHA2_SHA256_HASH		(1ULL << 10)
+/**< SHA256 Hash of SHA2 family is supported */
+#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS	(1ULL << 11)
+/**< Creation of non-compressed blocks using RTE_COMP_LEVEL_NONE is supported */
+#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM	(1ULL << 12)
+/**< Private xforms created by the PMD can be shared
+ * across multiple stateless operations. If not set, then app needs
+ * to create as many priv_xforms as it expects to have stateless
+ * operations in-flight.
+ */
+#define RTE_COMP_FF_HUFFMAN_FIXED		(1ULL << 13)
+/**< Fixed huffman encoding is supported */
+#define RTE_COMP_FF_HUFFMAN_DYNAMIC		(1ULL << 14)
+/**< Dynamic huffman encoding is supported */
+
+/** Status of comp operation */
+enum rte_comp_op_status {
+	RTE_COMP_OP_STATUS_SUCCESS = 0,
+	/**< Operation completed successfully */
+	RTE_COMP_OP_STATUS_NOT_PROCESSED,
+	/**< Operation has not yet been processed by the device */
+	RTE_COMP_OP_STATUS_INVALID_ARGS,
+	/**< Operation failed due to invalid arguments in request */
+	RTE_COMP_OP_STATUS_ERROR,
+	/**< Error handling operation */
+	RTE_COMP_OP_STATUS_INVALID_STATE,
+	/**< Operation is invoked in invalid state */
+	RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED,
+	/**< Output buffer ran out of space before operation completed.
+	 * Error case. Application must resubmit all data with a larger
+	 * output buffer.
+	 */
+	RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE,
+	/**< Output buffer ran out of space before operation completed, but this
+	 * is not an error case. Output data up to op.produced can be used and
+	 * next op in the stream should continue on from op.consumed+1.
+	 */
+};
+
+/** Compression Algorithms */
+enum rte_comp_algorithm {
+	RTE_COMP_ALGO_UNSPECIFIED = 0,
+	/** No Compression algorithm */
+	RTE_COMP_ALGO_NULL,
+	/**< No compression.
+	 * Pass-through, data is copied unchanged from source buffer to
+	 * destination buffer.
+	 */
+	RTE_COMP_ALGO_DEFLATE,
+	/**< DEFLATE compression algorithm
+	 * https://tools.ietf.org/html/rfc1951
+	 */
+	RTE_COMP_ALGO_LZS,
+	/**< LZS compression algorithm
+	 * https://tools.ietf.org/html/rfc2395
+	 */
+	RTE_COMP_ALGO_LIST_END
+};
+
+/** Compression Hash Algorithms */
+enum rte_comp_hash_algorithm {
+	RTE_COMP_HASH_ALGO_NONE = 0,
+	/**< No hash */
+	RTE_COMP_HASH_ALGO_SHA1,
+	/**< SHA1 hash algorithm */
+	RTE_COMP_HASH_ALGO_SHA2_256,
+	/**< SHA256 hash algorithm of SHA2 family */
+	RTE_COMP_HASH_ALGO_LIST_END
+};
+
+/**< Compression Level.
+ * The number is interpreted by each PMD differently. However, lower numbers
+ * give fastest compression, at the expense of compression ratio while
+ * higher numbers may give better compression ratios but are likely slower.
+ */
+#define	RTE_COMP_LEVEL_PMD_DEFAULT	(-1)
+/** Use PMD Default */
+#define	RTE_COMP_LEVEL_NONE		(0)
+/** Output uncompressed blocks if supported by the specified algorithm */
+#define RTE_COMP_LEVEL_MIN		(1)
+/** Use minimum compression level supported by the PMD */
+#define RTE_COMP_LEVEL_MAX		(9)
+/** Use maximum compression level supported by the PMD */
+
+/** Compression checksum types */
+enum rte_comp_checksum_type {
+	RTE_COMP_CHECKSUM_NONE,
+	/**< No checksum generated */
+	RTE_COMP_CHECKSUM_CRC32,
+	/**< Generates a CRC32 checksum, as used by gzip */
+	RTE_COMP_CHECKSUM_ADLER32,
+	/**< Generates an Adler-32 checksum, as used by zlib */
+	RTE_COMP_CHECKSUM_CRC32_ADLER32,
+	/**< Generates both Adler-32 and CRC32 checksums, concatenated.
+	 * CRC32 is in the lower 32bits, Adler-32 in the upper 32 bits.
+	 */
+};
+
+
+/** Compression Huffman Type - used by DEFLATE algorithm */
+enum rte_comp_huffman {
+	RTE_COMP_HUFFMAN_DEFAULT,
+	/**< PMD may choose which Huffman codes to use */
+	RTE_COMP_HUFFMAN_FIXED,
+	/**< Use Fixed Huffman codes */
+	RTE_COMP_HUFFMAN_DYNAMIC,
+	/**< Use Dynamic Huffman codes */
+};
+
+/** Compression flush flags */
+enum rte_comp_flush_flag {
+	RTE_COMP_FLUSH_NONE,
+	/**< Data is not flushed. Output may remain in the compressor and be
+	 * processed during a following op. It may not be possible to decompress
+	 * output until a later op with some other flush flag has been sent.
+	 */
+	RTE_COMP_FLUSH_SYNC,
+	/**< All data should be flushed to output buffer. Output data can be
+	 * decompressed. However state and history is not cleared, so future
+	 * operations may use history from this operation.
+	 */
+	RTE_COMP_FLUSH_FULL,
+	/**< All data should be flushed to output buffer. Output data can be
+	 * decompressed. State and history data is cleared, so future
+	 * ops will be independent of ops processed before this.
+	 */
+	RTE_COMP_FLUSH_FINAL
+	/**< Same as RTE_COMP_FLUSH_FULL but if op.algo is RTE_COMP_ALGO_DEFLATE
+	 * then bfinal bit is set in the last block.
+	 */
+};
+
+/** Compression transform types */
+enum rte_comp_xform_type {
+	RTE_COMP_COMPRESS,
+	/**< Compression service - compress */
+	RTE_COMP_DECOMPRESS,
+	/**< Compression service - decompress */
+};
+
+/** Compression operation type */
+enum rte_comp_op_type {
+	RTE_COMP_OP_STATELESS,
+	/**< All data to be processed is submitted in the op, no state or
+	 * history from previous ops is used and none will be stored for future
+	 * ops. Flush flag must be set to either FLUSH_FULL or FLUSH_FINAL.
+	 */
+	RTE_COMP_OP_STATEFUL
+	/**< There may be more data to be processed after this op, it's part of
+	 * a stream of data. State and history from previous ops can be used
+	 * and resulting state and history can be stored for future ops,
+	 * depending on flush flag.
+	 */
+};
+
+
+/** Parameters specific to the deflate algorithm */
+struct rte_comp_deflate_params {
+	enum rte_comp_huffman huffman;
+	/**< Compression huffman encoding type */
+};
+
+/** Setup Data for compression */
+struct rte_comp_compress_xform {
+	enum rte_comp_algorithm algo;
+	/**< Algorithm to use for compress operation */
+	union {
+		struct rte_comp_deflate_params deflate;
+		/**< Parameters specific to the deflate algorithm */
+	}; /**< Algorithm specific parameters */
+	int level;
+	/**< Compression level */
+	uint8_t window_size;
+	/**< Base two log value of sliding window to be used. If window size
+	 * can't be supported by the PMD then it may fall back to a smaller
+	 * size. This is likely to result in a worse compression ratio.
+	 */
+	enum rte_comp_checksum_type chksum;
+	/**< Type of checksum to generate on the uncompressed data */
+	enum rte_comp_hash_algorithm hash_algo;
+	/**< Hash algorithm to be used with compress operation. Hash is always
+	 * done on plaintext.
+	 */
+};
+
+/**
+ * Setup Data for decompression.
+ */
+struct rte_comp_decompress_xform {
+	enum rte_comp_algorithm algo;
+	/**< Algorithm to use for decompression */
+	enum rte_comp_checksum_type chksum;
+	/**< Type of checksum to generate on the decompressed data */
+	uint8_t window_size;
+	/**< Base two log value of sliding window which was used to generate
+	 * compressed data. If window size can't be supported by the PMD then
+	 * setup of stream or private_xform should fail.
+	 */
+	enum rte_comp_hash_algorithm hash_algo;
+	/**< Hash algorithm to be used with decompress operation. Hash is always
+	 * done on plaintext.
+	 */
+};
+
+/**
+ * Compression transform structure.
+ *
+ * This is used to specify the compression transforms required.
+ * Each transform structure can hold a single transform, the type field is
+ * used to specify which transform is contained within the union.
+ */
+struct rte_comp_xform {
+	enum rte_comp_xform_type type;
+	/**< xform type */
+	union {
+		struct rte_comp_compress_xform compress;
+		/**< xform for compress operation */
+		struct rte_comp_decompress_xform decompress;
+		/**< decompress xform */
+	};
+};
+
+/**
+ * Compression Operation.
+ *
+ * This structure contains data relating to performing a compression
+ * operation on the referenced mbuf data buffers.
+ *
+ * Comp operations are enqueued and dequeued in comp PMDs using the
+ * rte_compressdev_enqueue_burst() / rte_compressdev_dequeue_burst() APIs
+ */
+struct rte_comp_op {
+	enum rte_comp_op_type op_type;
+	union {
+		void *private_xform;
+		/**< Stateless private PMD data derived from an rte_comp_xform.
+		 * A handle returned by rte_compressdev_private_xform_create()
+		 * must be attached to operations of op_type RTE_COMP_STATELESS.
+		 */
+		void *stream;
+		/**< Private PMD data derived initially from an rte_comp_xform,
+		 * which holds state and history data and evolves as operations
+		 * are processed. rte_compressdev_stream_create() must be called
+		 * on a device for all STATEFUL data streams and the resulting
+		 * stream attached to the one or more operations associated
+		 * with the data stream.
+		 * All operations in a stream must be sent to the same device.
+		 */
+	};
+
+	struct rte_mempool *mempool;
+	/**< Pool from which operation is allocated */
+	rte_iova_t iova_addr;
+	/**< IOVA address of this operation */
+	struct rte_mbuf *m_src;
+	/**< source mbuf
+	 * The total size of the input buffer(s) can be retrieved using
+	 * rte_pktmbuf_data_len(m_src). The max data size which can fit in a
+	 * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
+	 * If the input data is bigger than this it can be passed to the PMD in
+	 * a chain of mbufs if the PMD's capabilities indicate it supports this.
+	 */
+	struct rte_mbuf *m_dst;
+	/**< destination mbuf
+	 * The total size of the output buffer(s) can be retrieved using
+	 * rte_pktmbuf_data_len(m_dst). The max data size which can fit in a
+	 * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
+	 * If the output data is expected to be bigger than this a chain of
+	 * mbufs can be passed to the PMD if the PMD's capabilities indicate
+	 * it supports this.
+	 */
+
+	struct {
+		uint32_t offset;
+		/**< Starting point for compression or decompression,
+		 * specified as number of bytes from start of packet in
+		 * source buffer.
+		 * This offset starts from the first segment
+		 * of the buffer, in case the m_src is a chain of mbufs.
+		 * Starting point for checksum generation in compress direction.
+		 */
+		uint32_t length;
+		/**< The length, in bytes, of the data in source buffer
+		 * to be compressed or decompressed.
+		 * Also the length of the data over which the checksum
+		 * should be generated in compress direction
+		 */
+	} src;
+	struct {
+		uint32_t offset;
+		/**< Starting point for writing output data, specified as
+		 * number of bytes from start of packet in dest
+		 * buffer.
+		 * This offset starts from the first segment
+		 * of the buffer, in case the m_dst is a chain of mbufs.
+		 * Starting point for checksum generation in
+		 * decompress direction.
+		 */
+	} dst;
+	struct {
+		uint8_t *digest;
+		/**< Output buffer to store hash output, if enabled in xform.
+		 * Buffer would contain valid value only after an op with
+		 * flush flag = RTE_COMP_FLUSH_FULL/FLUSH_FINAL is processed
+		 * successfully.
+		 *
+		 * Length of buffer should be contiguous and large enough to
+		 * accommodate digest produced by specific hash algo.
+		 */
+		rte_iova_t iova_addr;
+		/**< IO address of the buffer */
+	} hash;
+	enum rte_comp_flush_flag flush_flag;
+	/**< Defines flush characteristics for the output data.
+	 * Only applicable in compress direction
+	 */
+	uint64_t input_chksum;
+	/**< An input checksum can be provided to generate a
+	 * cumulative checksum across sequential blocks in a STATELESS stream.
+	 * Checksum type is as specified in xform chksum_type
+	 */
+	uint64_t output_chksum;
+	/**< If a checksum is generated it will be written in here.
+	 * Checksum type is as specified in xform chksum_type.
+	 */
+	uint32_t consumed;
+	/**< The number of bytes from the source buffer
+	 * which were compressed/decompressed.
+	 */
+	uint32_t produced;
+	/**< The number of bytes written to the destination buffer
+	 * which were compressed/decompressed.
+	 */
+	uint64_t debug_status;
+	/**<
+	 * Status of the operation is returned in the status param.
+	 * This field allows the PMD to pass back extra
+	 * pmd-specific debug information. Value is not defined on the API.
+	 */
+	uint8_t status;
+	/**<
+	 * Operation status - use values from enum rte_comp_status.
+	 * This is reset to
+	 * RTE_COMP_OP_STATUS_NOT_PROCESSED on allocation from mempool and
+	 * will be set to RTE_COMP_OP_STATUS_SUCCESS after operation
+	 * is successfully processed by a PMD
+	 */
+} __rte_cache_aligned;
+
+/**
+ * Creates an operation pool
+ *
+ * @param name
+ *   Compress pool name
+ * @param nb_elts
+ *   Number of elements in pool
+ * @param cache_size
+ *   Number of elements to cache on lcore, see
+ *   *rte_mempool_create* for further details about cache size
+ * @param user_size
+ *   Size of private data to allocate for user with each operation
+ * @param socket_id
+ *   Socket to identifier allocate memory on
+ * @return
+ *  - On success pointer to mempool
+ *  - On failure NULL
+ */
+struct rte_mempool * __rte_experimental
+rte_comp_op_pool_create(const char *name,
+		unsigned int nb_elts, unsigned int cache_size,
+		uint16_t user_size, int socket_id);
+
+/**
+ * Allocate an operation from a mempool with default parameters set
+ *
+ * @param mempool
+ *   Compress operation mempool
+ *
+ * @return
+ * - On success returns a valid rte_comp_op structure
+ * - On failure returns NULL
+ */
+struct rte_comp_op * __rte_experimental
+rte_comp_op_alloc(struct rte_mempool *mempool);
+
+/**
+ * Bulk allocate operations from a mempool with default parameters set
+ *
+ * @param mempool
+ *   Compress operation mempool
+ * @param ops
+ *   Array to place allocated operations
+ * @param nb_ops
+ *   Number of operations to allocate
+ * @return
+ *   - 0: Success
+ *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ */
+int __rte_experimental
+rte_comp_op_bulk_alloc(struct rte_mempool *mempool,
+		struct rte_comp_op **ops, uint16_t nb_ops);
+
+/**
+ * Free operation structure
+ * If operation has been allocate from a rte_mempool, then the operation will
+ * be returned to the mempool.
+ *
+ * @param op
+ *   Compress operation
+ */
+void __rte_experimental
+rte_comp_op_free(struct rte_comp_op *op);
+
+/**
+ * Get the name of a compress service feature flag
+ *
+ * @param flag
+ *   The mask describing the flag
+ *
+ * @return
+ *   The name of this flag, or NULL if it's not a valid feature flag.
+ */
+const char * __rte_experimental
+rte_comp_get_feature_name(uint64_t flag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COMP_H_ */
diff --git a/lib/librte_compressdev/rte_compressdev.c b/lib/librte_compressdev/rte_compressdev.c
new file mode 100644
index 00000000..9091dd6e
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev.c
@@ -0,0 +1,772 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_memzone.h>
+
+#include "rte_compressdev.h"
+#include "rte_compressdev_internal.h"
+#include "rte_compressdev_pmd.h"
+
+#define RTE_COMPRESSDEV_DETACHED  (0)
+#define RTE_COMPRESSDEV_ATTACHED  (1)
+
+struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS];
+
+struct rte_compressdev *rte_compressdevs = &rte_comp_devices[0];
+
+static struct rte_compressdev_global compressdev_globals = {
+		.devs			= &rte_comp_devices[0],
+		.data			= { NULL },
+		.nb_devs		= 0,
+		.max_devs		= RTE_COMPRESS_MAX_DEVS
+};
+
+struct rte_compressdev_global *rte_compressdev_globals = &compressdev_globals;
+
+const struct rte_compressdev_capabilities * __rte_experimental
+rte_compressdev_capability_get(uint8_t dev_id,
+			enum rte_comp_algorithm algo)
+{
+	const struct rte_compressdev_capabilities *capability;
+	struct rte_compressdev_info dev_info;
+	int i = 0;
+
+	if (dev_id >= compressdev_globals.nb_devs) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id);
+		return NULL;
+	}
+	rte_compressdev_info_get(dev_id, &dev_info);
+
+	while ((capability = &dev_info.capabilities[i++])->algo !=
+			RTE_COMP_ALGO_UNSPECIFIED){
+		if (capability->algo == algo)
+			return capability;
+	}
+
+	return NULL;
+}
+
+const char * __rte_experimental
+rte_compressdev_get_feature_name(uint64_t flag)
+{
+	switch (flag) {
+	case RTE_COMPDEV_FF_HW_ACCELERATED:
+		return "HW_ACCELERATED";
+	case RTE_COMPDEV_FF_CPU_SSE:
+		return "CPU_SSE";
+	case RTE_COMPDEV_FF_CPU_AVX:
+		return "CPU_AVX";
+	case RTE_COMPDEV_FF_CPU_AVX2:
+		return "CPU_AVX2";
+	case RTE_COMPDEV_FF_CPU_AVX512:
+		return "CPU_AVX512";
+	case RTE_COMPDEV_FF_CPU_NEON:
+		return "CPU_NEON";
+	default:
+		return NULL;
+	}
+}
+
+static struct rte_compressdev *
+rte_compressdev_get_dev(uint8_t dev_id)
+{
+	return &rte_compressdev_globals->devs[dev_id];
+}
+
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_get_named_dev(const char *name)
+{
+	struct rte_compressdev *dev;
+	unsigned int i;
+
+	if (name == NULL)
+		return NULL;
+
+	for (i = 0; i < rte_compressdev_globals->max_devs; i++) {
+		dev = &rte_compressdev_globals->devs[i];
+
+		if ((dev->attached == RTE_COMPRESSDEV_ATTACHED) &&
+				(strcmp(dev->data->name, name) == 0))
+			return dev;
+	}
+
+	return NULL;
+}
+
+static unsigned int
+rte_compressdev_is_valid_dev(uint8_t dev_id)
+{
+	struct rte_compressdev *dev = NULL;
+
+	if (dev_id >= rte_compressdev_globals->nb_devs)
+		return 0;
+
+	dev = rte_compressdev_get_dev(dev_id);
+	if (dev->attached != RTE_COMPRESSDEV_ATTACHED)
+		return 0;
+	else
+		return 1;
+}
+
+
+int __rte_experimental
+rte_compressdev_get_dev_id(const char *name)
+{
+	unsigned int i;
+
+	if (name == NULL)
+		return -1;
+
+	for (i = 0; i < rte_compressdev_globals->nb_devs; i++)
+		if ((strcmp(rte_compressdev_globals->devs[i].data->name, name)
+				== 0) &&
+				(rte_compressdev_globals->devs[i].attached ==
+						RTE_COMPRESSDEV_ATTACHED))
+			return i;
+
+	return -1;
+}
+
+uint8_t __rte_experimental
+rte_compressdev_count(void)
+{
+	return rte_compressdev_globals->nb_devs;
+}
+
+uint8_t __rte_experimental
+rte_compressdev_devices_get(const char *driver_name, uint8_t *devices,
+	uint8_t nb_devices)
+{
+	uint8_t i, count = 0;
+	struct rte_compressdev *devs = rte_compressdev_globals->devs;
+	uint8_t max_devs = rte_compressdev_globals->max_devs;
+
+	for (i = 0; i < max_devs && count < nb_devices;	i++) {
+
+		if (devs[i].attached == RTE_COMPRESSDEV_ATTACHED) {
+			int cmp;
+
+			cmp = strncmp(devs[i].device->driver->name,
+					driver_name,
+					strlen(driver_name));
+
+			if (cmp == 0)
+				devices[count++] = devs[i].data->dev_id;
+		}
+	}
+
+	return count;
+}
+
+int __rte_experimental
+rte_compressdev_socket_id(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+
+	if (!rte_compressdev_is_valid_dev(dev_id))
+		return -1;
+
+	dev = rte_compressdev_get_dev(dev_id);
+
+	return dev->data->socket_id;
+}
+
+static inline int
+rte_compressdev_data_alloc(uint8_t dev_id, struct rte_compressdev_data **data,
+		int socket_id)
+{
+	char mz_name[RTE_COMPRESSDEV_NAME_MAX_LEN];
+	const struct rte_memzone *mz;
+	int n;
+
+	/* generate memzone name */
+	n = snprintf(mz_name, sizeof(mz_name),
+			"rte_compressdev_data_%u", dev_id);
+	if (n >= (int)sizeof(mz_name))
+		return -EINVAL;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct rte_compressdev_data),
+				socket_id, 0);
+	} else
+		mz = rte_memzone_lookup(mz_name);
+
+	if (mz == NULL)
+		return -ENOMEM;
+
+	*data = mz->addr;
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		memset(*data, 0, sizeof(struct rte_compressdev_data));
+
+	return 0;
+}
+
+static uint8_t
+rte_compressdev_find_free_device_index(void)
+{
+	uint8_t dev_id;
+
+	for (dev_id = 0; dev_id < RTE_COMPRESS_MAX_DEVS; dev_id++) {
+		if (rte_comp_devices[dev_id].attached ==
+				RTE_COMPRESSDEV_DETACHED)
+			return dev_id;
+	}
+	return RTE_COMPRESS_MAX_DEVS;
+}
+
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_allocate(const char *name, int socket_id)
+{
+	struct rte_compressdev *compressdev;
+	uint8_t dev_id;
+
+	if (rte_compressdev_pmd_get_named_dev(name) != NULL) {
+		COMPRESSDEV_LOG(ERR,
+			"comp device with name %s already allocated!", name);
+		return NULL;
+	}
+
+	dev_id = rte_compressdev_find_free_device_index();
+	if (dev_id == RTE_COMPRESS_MAX_DEVS) {
+		COMPRESSDEV_LOG(ERR, "Reached maximum number of comp devices");
+		return NULL;
+	}
+	compressdev = rte_compressdev_get_dev(dev_id);
+
+	if (compressdev->data == NULL) {
+		struct rte_compressdev_data *compressdev_data =
+				compressdev_globals.data[dev_id];
+
+		int retval = rte_compressdev_data_alloc(dev_id,
+				&compressdev_data, socket_id);
+
+		if (retval < 0 || compressdev_data == NULL)
+			return NULL;
+
+		compressdev->data = compressdev_data;
+
+		snprintf(compressdev->data->name, RTE_COMPRESSDEV_NAME_MAX_LEN,
+				"%s", name);
+
+		compressdev->data->dev_id = dev_id;
+		compressdev->data->socket_id = socket_id;
+		compressdev->data->dev_started = 0;
+
+		compressdev->attached = RTE_COMPRESSDEV_ATTACHED;
+
+		compressdev_globals.nb_devs++;
+	}
+
+	return compressdev;
+}
+
+int __rte_experimental
+rte_compressdev_pmd_release_device(struct rte_compressdev *compressdev)
+{
+	int ret;
+
+	if (compressdev == NULL)
+		return -EINVAL;
+
+	/* Close device only if device operations have been set */
+	if (compressdev->dev_ops) {
+		ret = rte_compressdev_close(compressdev->data->dev_id);
+		if (ret < 0)
+			return ret;
+	}
+
+	compressdev->attached = RTE_COMPRESSDEV_DETACHED;
+	compressdev_globals.nb_devs--;
+	return 0;
+}
+
+uint16_t __rte_experimental
+rte_compressdev_queue_pair_count(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+
+	dev = &rte_comp_devices[dev_id];
+	return dev->data->nb_queue_pairs;
+}
+
+static int
+rte_compressdev_queue_pairs_config(struct rte_compressdev *dev,
+		uint16_t nb_qpairs, int socket_id)
+{
+	struct rte_compressdev_info dev_info;
+	void **qp;
+	unsigned int i;
+
+	if ((dev == NULL) || (nb_qpairs < 1)) {
+		COMPRESSDEV_LOG(ERR, "invalid param: dev %p, nb_queues %u",
+							dev, nb_qpairs);
+		return -EINVAL;
+	}
+
+	COMPRESSDEV_LOG(DEBUG, "Setup %d queues pairs on device %u",
+			nb_qpairs, dev->data->dev_id);
+
+	memset(&dev_info, 0, sizeof(struct rte_compressdev_info));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);
+
+	if ((dev_info.max_nb_queue_pairs != 0) &&
+			(nb_qpairs > dev_info.max_nb_queue_pairs)) {
+		COMPRESSDEV_LOG(ERR, "Invalid num queue_pairs (%u) for dev %u",
+				nb_qpairs, dev->data->dev_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->queue_pairs == NULL) { /* first time configuration */
+		dev->data->queue_pairs = rte_zmalloc_socket(
+				"compressdev->queue_pairs",
+				sizeof(dev->data->queue_pairs[0]) * nb_qpairs,
+				RTE_CACHE_LINE_SIZE, socket_id);
+
+		if (dev->data->queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			COMPRESSDEV_LOG(ERR,
+			"failed to get memory for qp meta data, nb_queues %u",
+							nb_qpairs);
+			return -(ENOMEM);
+		}
+	} else { /* re-configure */
+		int ret;
+		uint16_t old_nb_queues = dev->data->nb_queue_pairs;
+
+		qp = dev->data->queue_pairs;
+
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release,
+				-ENOTSUP);
+
+		for (i = nb_qpairs; i < old_nb_queues; i++) {
+			ret = (*dev->dev_ops->queue_pair_release)(dev, i);
+			if (ret < 0)
+				return ret;
+		}
+
+		qp = rte_realloc(qp, sizeof(qp[0]) * nb_qpairs,
+				RTE_CACHE_LINE_SIZE);
+		if (qp == NULL) {
+			COMPRESSDEV_LOG(ERR,
+			"failed to realloc qp meta data, nb_queues %u",
+						nb_qpairs);
+			return -(ENOMEM);
+		}
+
+		if (nb_qpairs > old_nb_queues) {
+			uint16_t new_qs = nb_qpairs - old_nb_queues;
+
+			memset(qp + old_nb_queues, 0,
+				sizeof(qp[0]) * new_qs);
+		}
+
+		dev->data->queue_pairs = qp;
+
+	}
+	dev->data->nb_queue_pairs = nb_qpairs;
+	return 0;
+}
+
+static int
+rte_compressdev_queue_pairs_release(struct rte_compressdev *dev)
+{
+	uint16_t num_qps, i;
+	int ret;
+
+	if (dev == NULL) {
+		COMPRESSDEV_LOG(ERR, "invalid param: dev %p", dev);
+		return -EINVAL;
+	}
+
+	num_qps = dev->data->nb_queue_pairs;
+
+	if (num_qps == 0)
+		return 0;
+
+	COMPRESSDEV_LOG(DEBUG, "Free %d queues pairs on device %u",
+			dev->data->nb_queue_pairs, dev->data->dev_id);
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release,
+				-ENOTSUP);
+
+	for (i = 0; i < num_qps; i++) {
+		ret = (*dev->dev_ops->queue_pair_release)(dev, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (dev->data->queue_pairs != NULL)
+		rte_free(dev->data->queue_pairs);
+	dev->data->queue_pairs = NULL;
+	dev->data->nb_queue_pairs = 0;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_compressdev_configure(uint8_t dev_id, struct rte_compressdev_config *config)
+{
+	struct rte_compressdev *dev;
+	int diag;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	if (dev->data->dev_started) {
+		COMPRESSDEV_LOG(ERR,
+		    "device %d must be stopped to allow configuration", dev_id);
+		return -EBUSY;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
+
+	/* Setup new number of queue pairs and reconfigure device. */
+	diag = rte_compressdev_queue_pairs_config(dev, config->nb_queue_pairs,
+			config->socket_id);
+	if (diag != 0) {
+		COMPRESSDEV_LOG(ERR,
+			"dev%d rte_comp_dev_queue_pairs_config = %d",
+				dev_id, diag);
+		return diag;
+	}
+
+	return (*dev->dev_ops->dev_configure)(dev, config);
+}
+
+int __rte_experimental
+rte_compressdev_start(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+	int diag;
+
+	COMPRESSDEV_LOG(DEBUG, "Start dev_id=%" PRIu8, dev_id);
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
+
+	if (dev->data->dev_started != 0) {
+		COMPRESSDEV_LOG(ERR,
+		    "Device with dev_id=%" PRIu8 " already started", dev_id);
+		return 0;
+	}
+
+	diag = (*dev->dev_ops->dev_start)(dev);
+	if (diag == 0)
+		dev->data->dev_started = 1;
+	else
+		return diag;
+
+	return 0;
+}
+
+void __rte_experimental
+rte_compressdev_stop(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
+
+	if (dev->data->dev_started == 0) {
+		COMPRESSDEV_LOG(ERR,
+		    "Device with dev_id=%" PRIu8 " already stopped", dev_id);
+		return;
+	}
+
+	(*dev->dev_ops->dev_stop)(dev);
+	dev->data->dev_started = 0;
+}
+
+int __rte_experimental
+rte_compressdev_close(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+	int retval;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return -1;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	/* Device must be stopped before it can be closed */
+	if (dev->data->dev_started == 1) {
+		COMPRESSDEV_LOG(ERR, "Device %u must be stopped before closing",
+				dev_id);
+		return -EBUSY;
+	}
+
+	/* Free queue pairs memory */
+	retval = rte_compressdev_queue_pairs_release(dev);
+
+	if (retval < 0)
+		return retval;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP);
+	retval = (*dev->dev_ops->dev_close)(dev);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_compressdev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
+		uint32_t max_inflight_ops, int socket_id)
+{
+	struct rte_compressdev *dev;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		COMPRESSDEV_LOG(ERR, "Invalid queue_pair_id=%d", queue_pair_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_started) {
+		COMPRESSDEV_LOG(ERR,
+		    "device %d must be stopped to allow configuration", dev_id);
+		return -EBUSY;
+	}
+
+	if (max_inflight_ops == 0) {
+		COMPRESSDEV_LOG(ERR,
+			"Invalid maximum number of inflight operations");
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP);
+
+	return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id,
+			max_inflight_ops, socket_id);
+}
+
+uint16_t __rte_experimental
+rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_comp_op **ops, uint16_t nb_ops)
+{
+	struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+
+	nb_ops = (*dev->dequeue_burst)
+			(dev->data->queue_pairs[qp_id], ops, nb_ops);
+
+	return nb_ops;
+}
+
+uint16_t __rte_experimental
+rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_comp_op **ops, uint16_t nb_ops)
+{
+	struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+
+	return (*dev->enqueue_burst)(
+			dev->data->queue_pairs[qp_id], ops, nb_ops);
+}
+
+int __rte_experimental
+rte_compressdev_stats_get(uint8_t dev_id, struct rte_compressdev_stats *stats)
+{
+	struct rte_compressdev *dev;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id);
+		return -ENODEV;
+	}
+
+	if (stats == NULL) {
+		COMPRESSDEV_LOG(ERR, "Invalid stats ptr");
+		return -EINVAL;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+	memset(stats, 0, sizeof(*stats));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP);
+	(*dev->dev_ops->stats_get)(dev, stats);
+	return 0;
+}
+
+void __rte_experimental
+rte_compressdev_stats_reset(uint8_t dev_id)
+{
+	struct rte_compressdev *dev;
+
+	if (!rte_compressdev_is_valid_dev(dev_id)) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%" PRIu8, dev_id);
+		return;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset);
+	(*dev->dev_ops->stats_reset)(dev);
+}
+
+
+void __rte_experimental
+rte_compressdev_info_get(uint8_t dev_id, struct rte_compressdev_info *dev_info)
+{
+	struct rte_compressdev *dev;
+
+	if (dev_id >= compressdev_globals.nb_devs) {
+		COMPRESSDEV_LOG(ERR, "Invalid dev_id=%d", dev_id);
+		return;
+	}
+
+	dev = &rte_comp_devices[dev_id];
+
+	memset(dev_info, 0, sizeof(struct rte_compressdev_info));
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
+	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
+
+	dev_info->driver_name = dev->device->driver->name;
+}
+
+int __rte_experimental
+rte_compressdev_private_xform_create(uint8_t dev_id,
+		const struct rte_comp_xform *xform,
+		void **priv_xform)
+{
+	struct rte_compressdev *dev;
+	int ret;
+
+	dev = rte_compressdev_get_dev(dev_id);
+
+	if (xform == NULL || priv_xform == NULL || dev == NULL)
+		return -EINVAL;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->private_xform_create, -ENOTSUP);
+	ret = (*dev->dev_ops->private_xform_create)(dev, xform, priv_xform);
+	if (ret < 0) {
+		COMPRESSDEV_LOG(ERR,
+			"dev_id %d failed to create private_xform: err=%d",
+			dev_id, ret);
+		return ret;
+	};
+
+	return 0;
+}
+
+int __rte_experimental
+rte_compressdev_private_xform_free(uint8_t dev_id, void *priv_xform)
+{
+	struct rte_compressdev *dev;
+	int ret;
+
+	dev = rte_compressdev_get_dev(dev_id);
+
+	if (dev == NULL || priv_xform == NULL)
+		return -EINVAL;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->private_xform_free, -ENOTSUP);
+	ret = dev->dev_ops->private_xform_free(dev, priv_xform);
+	if (ret < 0) {
+		COMPRESSDEV_LOG(ERR,
+			"dev_id %d failed to free private xform: err=%d",
+			dev_id, ret);
+		return ret;
+	};
+
+	return 0;
+}
+
+int __rte_experimental
+rte_compressdev_stream_create(uint8_t dev_id,
+		const struct rte_comp_xform *xform,
+		void **stream)
+{
+	struct rte_compressdev *dev;
+	int ret;
+
+	dev = rte_compressdev_get_dev(dev_id);
+
+	if (xform == NULL || dev == NULL || stream == NULL)
+		return -EINVAL;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stream_create, -ENOTSUP);
+	ret = (*dev->dev_ops->stream_create)(dev, xform, stream);
+	if (ret < 0) {
+		COMPRESSDEV_LOG(ERR,
+			"dev_id %d failed to create stream: err=%d",
+			dev_id, ret);
+		return ret;
+	};
+
+	return 0;
+}
+
+
+int __rte_experimental
+rte_compressdev_stream_free(uint8_t dev_id, void *stream)
+{
+	struct rte_compressdev *dev;
+	int ret;
+
+	dev = rte_compressdev_get_dev(dev_id);
+
+	if (dev == NULL || stream == NULL)
+		return -EINVAL;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stream_free, -ENOTSUP);
+	ret = dev->dev_ops->stream_free(dev, stream);
+	if (ret < 0) {
+		COMPRESSDEV_LOG(ERR,
+			"dev_id %d failed to free stream: err=%d",
+			dev_id, ret);
+		return ret;
+	};
+
+	return 0;
+}
+
+const char * __rte_experimental
+rte_compressdev_name_get(uint8_t dev_id)
+{
+	struct rte_compressdev *dev = rte_compressdev_get_dev(dev_id);
+
+	if (dev == NULL)
+		return NULL;
+
+	return dev->data->name;
+}
+
+RTE_INIT(rte_compressdev_log)
+{
+	compressdev_logtype = rte_log_register("lib.compressdev");
+	if (compressdev_logtype >= 0)
+		rte_log_set_level(compressdev_logtype, RTE_LOG_NOTICE);
+}
diff --git a/lib/librte_compressdev/rte_compressdev.h b/lib/librte_compressdev/rte_compressdev.h
new file mode 100644
index 00000000..5b4fca4d
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev.h
@@ -0,0 +1,540 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _RTE_COMPRESSDEV_H_
+#define _RTE_COMPRESSDEV_H_
+
+/**
+ * @file rte_compressdev.h
+ *
+ * RTE Compression Device APIs
+ *
+ * Defines comp device APIs for the provisioning of compression operations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#include "rte_comp.h"
+
+/**
+ * Parameter log base 2 range description.
+ * Final value will be 2^value.
+ */
+struct rte_param_log2_range {
+	uint8_t min;	/**< Minimum log2 value */
+	uint8_t max;	/**< Maximum log2 value */
+	uint8_t increment;
+	/**< If a range of sizes are supported,
+	 * this parameter is used to indicate
+	 * increments in base 2 log byte value
+	 * that are supported between the minimum and maximum
+	 */
+};
+
+/** Structure used to capture a capability of a comp device */
+struct rte_compressdev_capabilities {
+	enum rte_comp_algorithm algo;
+	/* Compression algorithm */
+	uint64_t comp_feature_flags;
+	/**< Bitmask of flags for compression service features */
+	struct rte_param_log2_range window_size;
+	/**< Window size range in base two log byte values */
+};
+
+/** Macro used at end of comp PMD list */
+#define RTE_COMP_END_OF_CAPABILITIES_LIST() \
+	{ RTE_COMP_ALGO_UNSPECIFIED }
+
+const struct rte_compressdev_capabilities * __rte_experimental
+rte_compressdev_capability_get(uint8_t dev_id,
+			enum rte_comp_algorithm algo);
+
+/**
+ * compression device supported feature flags
+ *
+ * @note New features flags should be added to the end of the list
+ *
+ * Keep these flags synchronised with rte_compressdev_get_feature_name()
+ */
+#define	RTE_COMPDEV_FF_HW_ACCELERATED		(1ULL << 0)
+/**< Operations are off-loaded to an external hardware accelerator */
+#define	RTE_COMPDEV_FF_CPU_SSE			(1ULL << 1)
+/**< Utilises CPU SIMD SSE instructions */
+#define	RTE_COMPDEV_FF_CPU_AVX			(1ULL << 2)
+/**< Utilises CPU SIMD AVX instructions */
+#define	RTE_COMPDEV_FF_CPU_AVX2			(1ULL << 3)
+/**< Utilises CPU SIMD AVX2 instructions */
+#define	RTE_COMPDEV_FF_CPU_AVX512		(1ULL << 4)
+/**< Utilises CPU SIMD AVX512 instructions */
+#define	RTE_COMPDEV_FF_CPU_NEON			(1ULL << 5)
+/**< Utilises CPU NEON instructions */
+
+/**
+ * Get the name of a compress device feature flag.
+ *
+ * @param flag
+ *   The mask describing the flag
+ *
+ * @return
+ *   The name of this flag, or NULL if it's not a valid feature flag.
+ */
+const char * __rte_experimental
+rte_compressdev_get_feature_name(uint64_t flag);
+
+/**  comp device information */
+struct rte_compressdev_info {
+	const char *driver_name;		/**< Driver name. */
+	uint64_t feature_flags;			/**< Feature flags */
+	const struct rte_compressdev_capabilities *capabilities;
+	/**< Array of devices supported capabilities */
+	uint16_t max_nb_queue_pairs;
+	/**< Maximum number of queues pairs supported by device.
+	 * (If 0, there is no limit in maximum number of queue pairs)
+	 */
+};
+
+/** comp device statistics */
+struct rte_compressdev_stats {
+	uint64_t enqueued_count;
+	/**< Count of all operations enqueued */
+	uint64_t dequeued_count;
+	/**< Count of all operations dequeued */
+
+	uint64_t enqueue_err_count;
+	/**< Total error count on operations enqueued */
+	uint64_t dequeue_err_count;
+	/**< Total error count on operations dequeued */
+};
+
+
+/**
+ * Get the device identifier for the named compress device.
+ *
+ * @param name
+ *   Device name to select the device structure
+ * @return
+ *   - Returns compress device identifier on success.
+ *   - Return -1 on failure to find named compress device.
+ */
+int __rte_experimental
+rte_compressdev_get_dev_id(const char *name);
+
+/**
+ * Get the compress device name given a device identifier.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @return
+ *   - Returns compress device name.
+ *   - Returns NULL if compress device is not present.
+ */
+const char * __rte_experimental
+rte_compressdev_name_get(uint8_t dev_id);
+
+/**
+ * Get the total number of compress devices that have been successfully
+ * initialised.
+ *
+ * @return
+ *   - The total number of usable compress devices.
+ */
+uint8_t __rte_experimental
+rte_compressdev_count(void);
+
+/**
+ * Get number and identifiers of attached comp devices that
+ * use the same compress driver.
+ *
+ * @param driver_name
+ *   Driver name
+ * @param devices
+ *   Output devices identifiers
+ * @param nb_devices
+ *   Maximal number of devices
+ *
+ * @return
+ *   Returns number of attached compress devices.
+ */
+uint8_t __rte_experimental
+rte_compressdev_devices_get(const char *driver_name, uint8_t *devices,
+		uint8_t nb_devices);
+
+/*
+ * Return the NUMA socket to which a device is connected.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @return
+ *   The NUMA socket id to which the device is connected or
+ *   a default of zero if the socket could not be determined.
+ *   -1 if returned is the dev_id value is out of range.
+ */
+int __rte_experimental
+rte_compressdev_socket_id(uint8_t dev_id);
+
+/** Compress device configuration structure */
+struct rte_compressdev_config {
+	int socket_id;
+	/**< Socket on which to allocate resources */
+	uint16_t nb_queue_pairs;
+	/**< Total number of queue pairs to configure on a device */
+	uint16_t max_nb_priv_xforms;
+	/**< Max number of private_xforms which will be created on the device */
+	uint16_t max_nb_streams;
+	/**< Max number of streams which will be created on the device */
+};
+
+/**
+ * Configure a device.
+ *
+ * This function must be invoked first before any other function in the
+ * API. This function can also be re-invoked when a device is in the
+ * stopped state.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param config
+ *   The compress device configuration
+ * @return
+ *   - 0: Success, device configured.
+ *   - <0: Error code returned by the driver configuration function.
+ */
+int __rte_experimental
+rte_compressdev_configure(uint8_t dev_id,
+			struct rte_compressdev_config *config);
+
+/**
+ * Start a device.
+ *
+ * The device start step is called after configuring the device and setting up
+ * its queue pairs.
+ * On success, data-path functions exported by the API (enqueue/dequeue, etc)
+ * can be invoked.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @return
+ *   - 0: Success, device started.
+ *   - <0: Error code of the driver device start function.
+ */
+int __rte_experimental
+rte_compressdev_start(uint8_t dev_id);
+
+/**
+ * Stop a device. The device can be restarted with a call to
+ * rte_compressdev_start()
+ *
+ * @param dev_id
+ *   Compress device identifier
+ */
+void __rte_experimental
+rte_compressdev_stop(uint8_t dev_id);
+
+/**
+ * Close an device.
+ * The memory allocated in the device gets freed.
+ * After calling this function, in order to use
+ * the device again, it is required to
+ * configure the device again.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ *
+ * @return
+ *  - 0 on successfully closing device
+ *  - <0 on failure to close device
+ */
+int __rte_experimental
+rte_compressdev_close(uint8_t dev_id);
+
+/**
+ * Allocate and set up a receive queue pair for a device.
+ * This should only be called when the device is stopped.
+ *
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param queue_pair_id
+ *   The index of the queue pairs to set up. The
+ *   value must be in the range [0, nb_queue_pair - 1]
+ *   previously supplied to rte_compressdev_configure()
+ * @param max_inflight_ops
+ *   Max number of ops which the qp will have to
+ *   accommodate simultaneously
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier
+ *   in case of NUMA. The value can be *SOCKET_ID_ANY*
+ *   if there is no NUMA constraint for the DMA memory
+ *   allocated for the receive queue pair
+ * @return
+ *   - 0: Success, queue pair correctly set up.
+ *   - <0: Queue pair configuration failed
+ */
+int __rte_experimental
+rte_compressdev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
+		uint32_t max_inflight_ops, int socket_id);
+
+/**
+ * Get the number of queue pairs on a specific comp device
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @return
+ *   - The number of configured queue pairs.
+ */
+uint16_t __rte_experimental
+rte_compressdev_queue_pair_count(uint8_t dev_id);
+
+
+/**
+ * Retrieve the general I/O statistics of a device.
+ *
+ * @param dev_id
+ *   The identifier of the device
+ * @param stats
+ *   A pointer to a structure of type
+ *   *rte_compressdev_stats* to be filled with the
+ *   values of device counters
+ * @return
+ *   - Zero if successful.
+ *   - Non-zero otherwise.
+ */
+int __rte_experimental
+rte_compressdev_stats_get(uint8_t dev_id, struct rte_compressdev_stats *stats);
+
+/**
+ * Reset the general I/O statistics of a device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ */
+void __rte_experimental
+rte_compressdev_stats_reset(uint8_t dev_id);
+
+/**
+ * Retrieve the contextual information of a device.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param dev_info
+ *   A pointer to a structure of type *rte_compressdev_info*
+ *   to be filled with the contextual information of the device
+ *
+ * @note The capabilities field of dev_info is set to point to the first
+ * element of an array of struct rte_compressdev_capabilities.
+ * The element after the last valid element has it's op field set to
+ * RTE_COMP_ALGO_LIST_END.
+ */
+void __rte_experimental
+rte_compressdev_info_get(uint8_t dev_id, struct rte_compressdev_info *dev_info);
+
+/**
+ *
+ * Dequeue a burst of processed compression operations from a queue on the comp
+ * device. The dequeued operation are stored in *rte_comp_op* structures
+ * whose pointers are supplied in the *ops* array.
+ *
+ * The rte_compressdev_dequeue_burst() function returns the number of ops
+ * actually dequeued, which is the number of *rte_comp_op* data structures
+ * effectively supplied into the *ops* array.
+ *
+ * A return value equal to *nb_ops* indicates that the queue contained
+ * at least *nb_ops* operations, and this is likely to signify that other
+ * processed operations remain in the devices output queue. Applications
+ * implementing a "retrieve as many processed operations as possible" policy
+ * can check this specific case and keep invoking the
+ * rte_compressdev_dequeue_burst() function until a value less than
+ * *nb_ops* is returned.
+ *
+ * The rte_compressdev_dequeue_burst() function does not provide any error
+ * notification to avoid the corresponding overhead.
+ *
+ * @note: operation ordering is not maintained within the queue pair.
+ *
+ * @note: In case op status = OUT_OF_SPACE_TERMINATED, op.consumed=0 and the
+ * op must be resubmitted with the same input data and a larger output buffer.
+ * op.produced is usually 0, but in decompression cases a PMD may return > 0
+ * and the application may find it useful to inspect that data.
+ * This status is only returned on STATELESS ops.
+ *
+ * @note: In case op status = OUT_OF_SPACE_RECOVERABLE, op.produced can be used
+ * and next op in stream should continue on from op.consumed+1 with a fresh
+ * output buffer.
+ * Consumed=0, produced=0 is an unusual but allowed case. There may be useful
+ * state/history stored in the PMD, even though no output was produced yet.
+ *
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param qp_id
+ *   The index of the queue pair from which to retrieve
+ *   processed operations. The value must be in the range
+ *   [0, nb_queue_pair - 1] previously supplied to
+ *   rte_compressdev_configure()
+ * @param ops
+ *   The address of an array of pointers to
+ *   *rte_comp_op* structures that must be
+ *   large enough to store *nb_ops* pointers in it
+ * @param nb_ops
+ *   The maximum number of operations to dequeue
+ * @return
+ *   - The number of operations actually dequeued, which is the number
+ *   of pointers to *rte_comp_op* structures effectively supplied to the
+ *   *ops* array.
+ */
+uint16_t __rte_experimental
+rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_comp_op **ops, uint16_t nb_ops);
+
+/**
+ * Enqueue a burst of operations for processing on a compression device.
+ *
+ * The rte_compressdev_enqueue_burst() function is invoked to place
+ * comp operations on the queue *qp_id* of the device designated by
+ * its *dev_id*.
+ *
+ * The *nb_ops* parameter is the number of operations to process which are
+ * supplied in the *ops* array of *rte_comp_op* structures.
+ *
+ * The rte_compressdev_enqueue_burst() function returns the number of
+ * operations it actually enqueued for processing. A return value equal to
+ * *nb_ops* means that all packets have been enqueued.
+ *
+ * @note All compression operations are Out-of-place (OOP) operations,
+ * as the size of the output data is different to the size of the input data.
+ *
+ * @note The flush flag only applies to operations which return SUCCESS.
+ * In OUT_OF_SPACE cases whether STATEFUL or STATELESS, data in dest buffer
+ * is as if flush flag was FLUSH_NONE.
+ * @note flush flag only applies in compression direction. It has no meaning
+ * for decompression.
+ * @note: operation ordering is not maintained within the queue pair.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param qp_id
+ *   The index of the queue pair on which operations
+ *   are to be enqueued for processing. The value
+ *   must be in the range [0, nb_queue_pairs - 1]
+ *   previously supplied to *rte_compressdev_configure*
+ * @param ops
+ *   The address of an array of *nb_ops* pointers
+ *   to *rte_comp_op* structures which contain
+ *   the operations to be processed
+ * @param nb_ops
+ *   The number of operations to process
+ * @return
+ *   The number of operations actually enqueued on the device. The return
+ *   value can be less than the value of the *nb_ops* parameter when the
+ *   comp devices queue is full or if invalid parameters are specified in
+ *   a *rte_comp_op*.
+ */
+uint16_t __rte_experimental
+rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_comp_op **ops, uint16_t nb_ops);
+
+/**
+ * This should alloc a stream from the device's mempool and initialise it.
+ * The application should call this API when setting up for the stateful
+ * processing of a set of data on a device. The API can be called multiple
+ * times to set up a stream for each data set. The handle returned is only for
+ * use with ops of op_type STATEFUL and must be passed to the PMD
+ * with every op in the data stream
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param xform
+ *   xform data
+ * @param stream
+ *   Pointer to where PMD's private stream handle should be stored
+ *
+ * @return
+ *  - 0 if successful and valid stream handle
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support STATEFUL operations.
+ *  - Returns -ENOTSUP if comp device does not support the comp transform.
+ *  - Returns -ENOMEM if the private stream could not be allocated.
+ *
+ */
+int __rte_experimental
+rte_compressdev_stream_create(uint8_t dev_id,
+		const struct rte_comp_xform *xform,
+		void **stream);
+
+/**
+ * This should clear the stream and return it to the device's mempool.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ *
+ * @param stream
+ *   PMD's private stream data
+ *
+ * @return
+ *  - 0 if successful
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support STATEFUL operations.
+ *  - Returns -EBUSY if can't free stream as there are inflight operations
+ */
+int __rte_experimental
+rte_compressdev_stream_free(uint8_t dev_id, void *stream);
+
+/**
+ * This should alloc a private_xform from the device's mempool and initialise
+ * it. The application should call this API when setting up for stateless
+ * processing on a device. If it returns non-shareable, then the appl cannot
+ * share this handle with multiple in-flight ops and should call this API again
+ * to get a separate handle for every in-flight op.
+ * The handle returned is only valid for use with ops of op_type STATELESS.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ * @param xform
+ *   xform data
+ * @param private_xform
+ *   Pointer to where PMD's private_xform handle should be stored
+ *
+ * @return
+ *  - if successful returns 0
+ *    and valid private_xform handle
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support the comp transform.
+ *  - Returns -ENOMEM if the private_xform could not be allocated.
+ */
+int __rte_experimental
+rte_compressdev_private_xform_create(uint8_t dev_id,
+		const struct rte_comp_xform *xform,
+		void **private_xform);
+
+/**
+ * This should clear the private_xform and return it to the device's mempool.
+ * It is the application's responsibility to ensure that private_xform data
+ * is not cleared while there are still in-flight operations using it.
+ *
+ * @param dev_id
+ *   Compress device identifier
+ *
+ * @param private_xform
+ *   PMD's private_xform data
+ *
+ * @return
+ *  - 0 if successful
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ */
+int __rte_experimental
+rte_compressdev_private_xform_free(uint8_t dev_id, void *private_xform);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COMPRESSDEV_H_ */
diff --git a/lib/librte_compressdev/rte_compressdev_internal.h b/lib/librte_compressdev/rte_compressdev_internal.h
new file mode 100644
index 00000000..22ceac66
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev_internal.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _RTE_COMPRESSDEV_INTERNAL_H_
+#define _RTE_COMPRESSDEV_INTERNAL_H_
+
+/* rte_compressdev_internal.h
+ * This file holds Compressdev private data structures.
+ */
+#include <rte_log.h>
+
+#include "rte_comp.h"
+
+#define RTE_COMPRESSDEV_NAME_MAX_LEN	(64)
+/**< Max length of name of comp PMD */
+
+/* Logging Macros */
+extern int compressdev_logtype;
+#define COMPRESSDEV_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, compressdev_logtype, "%s(): "fmt "\n", \
+			__func__, ##args)
+
+/**
+ * Dequeue processed packets from queue pair of a device.
+ *
+ * @param qp
+ *   The queue pair from which to retrieve
+ *   processed operations.
+ * @param ops
+ *   The address of an array of pointers to
+ *   *rte_comp_op* structures that must be
+ *   large enough to store *nb_ops* pointers in it
+ * @param nb_ops
+ *   The maximum number of operations to dequeue
+ * @return
+ *   - The number of operations actually dequeued, which is the number
+ *   of pointers to *rte_comp_op* structures effectively supplied to the
+ *   *ops* array.
+ */
+typedef uint16_t (*compressdev_dequeue_pkt_burst_t)(void *qp,
+		struct rte_comp_op **ops, uint16_t nb_ops);
+
+/**
+ * Enqueue a burst of operations for processing.
+ *
+ * @param qp
+ *   The queue pair on which operations
+ *   are to be enqueued for processing
+ * @param ops
+ *   The address of an array of *nb_ops* pointers
+ *   to *rte_comp_op* structures which contain
+ *   the operations to be processed
+ * @param nb_ops
+ *   The number of operations to process
+ * @return
+ *   The number of operations actually enqueued on the device. The return
+ *   value can be less than the value of the *nb_ops* parameter when the
+ *   comp devices queue is full or if invalid parameters are specified in
+ *   a *rte_comp_op*.
+ */
+
+typedef uint16_t (*compressdev_enqueue_pkt_burst_t)(void *qp,
+		struct rte_comp_op **ops, uint16_t nb_ops);
+
+/** The data structure associated with each comp device. */
+struct rte_compressdev {
+	compressdev_dequeue_pkt_burst_t dequeue_burst;
+	/**< Pointer to PMD receive function */
+	compressdev_enqueue_pkt_burst_t enqueue_burst;
+	/**< Pointer to PMD transmit function */
+
+	struct rte_compressdev_data *data;
+	/**< Pointer to device data */
+	struct rte_compressdev_ops *dev_ops;
+	/**< Functions exported by PMD */
+	uint64_t feature_flags;
+	/**< Supported features */
+	struct rte_device *device;
+	/**< Backing device */
+
+	__extension__
+	uint8_t attached : 1;
+	/**< Flag indicating the device is attached */
+} __rte_cache_aligned;
+
+/**
+ *
+ * The data part, with no function pointers, associated with each device.
+ *
+ * This structure is safe to place in shared memory to be common among
+ * different processes in a multi-process configuration.
+ */
+struct rte_compressdev_data {
+	uint8_t dev_id;
+	/**< Compress device identifier */
+	uint8_t socket_id;
+	/**< Socket identifier where memory is allocated */
+	char name[RTE_COMPRESSDEV_NAME_MAX_LEN];
+	/**< Unique identifier name */
+
+	__extension__
+	uint8_t dev_started : 1;
+	/**< Device state: STARTED(1)/STOPPED(0) */
+
+	void **queue_pairs;
+	/**< Array of pointers to queue pairs. */
+	uint16_t nb_queue_pairs;
+	/**< Number of device queue pairs */
+
+	void *dev_private;
+	/**< PMD-specific private data */
+} __rte_cache_aligned;
+#endif
diff --git a/lib/librte_compressdev/rte_compressdev_pmd.c b/lib/librte_compressdev/rte_compressdev_pmd.c
new file mode 100644
index 00000000..7de4f339
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev_pmd.c
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_eal.h>
+
+#include "rte_compressdev_internal.h"
+#include "rte_compressdev_pmd.h"
+
+int compressdev_logtype;
+
+/**
+ * Parse name from argument
+ */
+static int
+rte_compressdev_pmd_parse_name_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	struct rte_compressdev_pmd_init_params *params = extra_args;
+	int n;
+
+	n = snprintf(params->name, RTE_COMPRESSDEV_NAME_MAX_LEN, "%s", value);
+	if (n >= RTE_COMPRESSDEV_NAME_MAX_LEN)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * Parse unsigned integer from argument
+ */
+static int
+rte_compressdev_pmd_parse_uint_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int i;
+	char *end;
+
+	errno = 0;
+	i = strtol(value, &end, 10);
+	if (*end != 0 || errno != 0 || i < 0)
+		return -EINVAL;
+
+	*((uint32_t *)extra_args) = i;
+	return 0;
+}
+
+int __rte_experimental
+rte_compressdev_pmd_parse_input_args(
+		struct rte_compressdev_pmd_init_params *params,
+		const char *args)
+{
+	struct rte_kvargs *kvlist = NULL;
+	int ret = 0;
+
+	if (params == NULL)
+		return -EINVAL;
+
+	if (args) {
+		kvlist = rte_kvargs_parse(args,	compressdev_pmd_valid_params);
+		if (kvlist == NULL)
+			return -EINVAL;
+
+		ret = rte_kvargs_process(kvlist,
+				RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG,
+				&rte_compressdev_pmd_parse_uint_arg,
+				&params->socket_id);
+		if (ret < 0)
+			goto free_kvlist;
+
+		ret = rte_kvargs_process(kvlist,
+				RTE_COMPRESSDEV_PMD_NAME_ARG,
+				&rte_compressdev_pmd_parse_name_arg,
+				params);
+		if (ret < 0)
+			goto free_kvlist;
+	}
+
+free_kvlist:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_create(const char *name,
+		struct rte_device *device,
+		size_t private_data_size,
+		struct rte_compressdev_pmd_init_params *params)
+{
+	struct rte_compressdev *compressdev;
+
+	if (params->name[0] != '\0') {
+		COMPRESSDEV_LOG(INFO, "[%s] User specified device name = %s\n",
+				device->driver->name, params->name);
+		name = params->name;
+	}
+
+	COMPRESSDEV_LOG(INFO, "[%s] - Creating compressdev %s\n",
+			device->driver->name, name);
+
+	COMPRESSDEV_LOG(INFO,
+	"[%s] - Init parameters - name: %s, socket id: %d",
+			device->driver->name, name,
+			params->socket_id);
+
+	/* allocate device structure */
+	compressdev = rte_compressdev_pmd_allocate(name, params->socket_id);
+	if (compressdev == NULL) {
+		COMPRESSDEV_LOG(ERR, "[%s] Failed to allocate comp device %s",
+				device->driver->name, name);
+		return NULL;
+	}
+
+	/* allocate private device structure */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		compressdev->data->dev_private =
+				rte_zmalloc_socket("compressdev device private",
+						private_data_size,
+						RTE_CACHE_LINE_SIZE,
+						params->socket_id);
+
+		if (compressdev->data->dev_private == NULL) {
+			COMPRESSDEV_LOG(ERR,
+		"[%s] Cannot allocate memory for compressdev %s private data",
+					device->driver->name, name);
+
+			rte_compressdev_pmd_release_device(compressdev);
+			return NULL;
+		}
+	}
+
+	compressdev->device = device;
+
+	return compressdev;
+}
+
+int __rte_experimental
+rte_compressdev_pmd_destroy(struct rte_compressdev *compressdev)
+{
+	int retval;
+
+	COMPRESSDEV_LOG(INFO, "[%s] Closing comp device %s",
+			compressdev->device->driver->name,
+			compressdev->device->name);
+
+	/* free comp device */
+	retval = rte_compressdev_pmd_release_device(compressdev);
+	if (retval)
+		return retval;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(compressdev->data->dev_private);
+
+	compressdev->device = NULL;
+	compressdev->data = NULL;
+
+	return 0;
+}
diff --git a/lib/librte_compressdev/rte_compressdev_pmd.h b/lib/librte_compressdev/rte_compressdev_pmd.h
new file mode 100644
index 00000000..38e9ea02
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev_pmd.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _RTE_COMPRESSDEV_PMD_H_
+#define _RTE_COMPRESSDEV_PMD_H_
+
+/** @file
+ * RTE comp PMD APIs
+ *
+ * @note
+ * These APIs are for comp PMDs only and user applications should not call
+ * them directly.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#include <rte_dev.h>
+#include <rte_common.h>
+
+#include "rte_compressdev.h"
+#include "rte_compressdev_internal.h"
+
+#define RTE_COMPRESSDEV_PMD_NAME_ARG			("name")
+#define RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG		("socket_id")
+
+static const char * const compressdev_pmd_valid_params[] = {
+	RTE_COMPRESSDEV_PMD_NAME_ARG,
+	RTE_COMPRESSDEV_PMD_SOCKET_ID_ARG
+};
+
+/**
+ * @internal
+ * Initialisation parameters for comp devices
+ */
+struct rte_compressdev_pmd_init_params {
+	char name[RTE_COMPRESSDEV_NAME_MAX_LEN];
+	int socket_id;
+};
+
+/** Global structure used for maintaining state of allocated comp devices */
+struct rte_compressdev_global {
+	struct rte_compressdev *devs;	/**< Device information array */
+	struct rte_compressdev_data *data[RTE_COMPRESS_MAX_DEVS];
+	/**< Device private data */
+	uint8_t nb_devs;		/**< Number of devices found */
+	uint8_t max_devs;		/**< Max number of devices */
+};
+
+/** Pointer to global array of comp devices */
+extern struct rte_compressdev *rte_compressdevs;
+/** Pointer to global comp devices data structure */
+extern struct rte_compressdev_global *rte_compressdev_globals;
+
+/**
+ * Get the rte_compressdev structure device pointer for the named device.
+ *
+ * @param name
+ *   Compress device name
+ * @return
+ *   - The rte_compressdev structure pointer for the given device identifier.
+ */
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_get_named_dev(const char *name);
+
+/**
+ * Definitions of all functions exported by a driver through the
+ * the generic structure of type *comp_dev_ops* supplied in the
+ * *rte_compressdev* structure associated with a device.
+ */
+
+/**
+ * Function used to configure device.
+ *
+ * @param dev
+ *   Compress device
+ * @param config
+ *   Compress device configurations
+ * @return
+ *   Returns 0 on success
+ */
+typedef int (*compressdev_configure_t)(struct rte_compressdev *dev,
+		struct rte_compressdev_config *config);
+
+/**
+ * Function used to start a configured device.
+ *
+ * @param dev
+ *   Compress device
+ * @return
+ *   Returns 0 on success
+ */
+typedef int (*compressdev_start_t)(struct rte_compressdev *dev);
+
+/**
+ * Function used to stop a configured device.
+ *
+ * @param dev
+ *   Compress device
+ */
+typedef void (*compressdev_stop_t)(struct rte_compressdev *dev);
+
+/**
+ * Function used to close a configured device.
+ *
+ * @param dev
+ *   Compress device
+ * @return
+ * - 0 on success.
+ * - EAGAIN if can't close as device is busy
+ */
+typedef int (*compressdev_close_t)(struct rte_compressdev *dev);
+
+
+/**
+ * Function used to get statistics of a device.
+ *
+ * @param dev
+ *   Compress device
+ * @param stats
+ *   Compress device stats to populate
+ */
+typedef void (*compressdev_stats_get_t)(struct rte_compressdev *dev,
+				struct rte_compressdev_stats *stats);
+
+
+/**
+ * Function used to reset statistics of a device.
+ *
+ * @param dev
+ *   Compress device
+ */
+typedef void (*compressdev_stats_reset_t)(struct rte_compressdev *dev);
+
+
+/**
+ * Function used to get specific information of a device.
+ *
+ * @param dev
+ *   Compress device
+ */
+typedef void (*compressdev_info_get_t)(struct rte_compressdev *dev,
+				struct rte_compressdev_info *dev_info);
+
+/**
+ * Setup a queue pair for a device.
+ *
+ * @param dev
+ *   Compress device
+ * @param qp_id
+ *   Queue pair identifier
+ * @param max_inflight_ops
+ *   Max inflight ops which qp must accommodate
+ * @param socket_id
+ *   Socket identifier
+ * @return
+ *   Returns 0 on success.
+ */
+typedef int (*compressdev_queue_pair_setup_t)(struct rte_compressdev *dev,
+		uint16_t qp_id,	uint32_t max_inflight_ops, int socket_id);
+
+/**
+ * Release memory resources allocated by given queue pair.
+ *
+ * @param dev
+ *   Compress device
+ * @param qp_id
+ *   Queue pair identifier
+ * @return
+ * - 0 on success.
+ * - EAGAIN if can't close as device is busy
+ */
+typedef int (*compressdev_queue_pair_release_t)(struct rte_compressdev *dev,
+		uint16_t qp_id);
+
+/**
+ * Get number of available queue pairs of a device.
+ *
+ * @param dev
+ *   Compress device
+ * @return
+ *   Returns number of queue pairs on success.
+ */
+typedef uint32_t (*compressdev_queue_pair_count_t)(struct rte_compressdev *dev);
+
+/**
+ * Create driver private stream data.
+ *
+ * @param dev
+ *   Compressdev device
+ * @param xform
+ *   xform data
+ * @param stream
+ *   ptr where handle of pmd's private stream data should be stored
+ * @return
+ *  - Returns 0 if private stream structure has been created successfully.
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support STATEFUL operations.
+ *  - Returns -ENOTSUP if comp device does not support the comp transform.
+ *  - Returns -ENOMEM if the private stream could not be allocated.
+ */
+typedef int (*compressdev_stream_create_t)(struct rte_compressdev *dev,
+		const struct rte_comp_xform *xform, void **stream);
+
+/**
+ * Free driver private stream data.
+ *
+ * @param dev
+ *   Compressdev device
+ * @param stream
+ *   handle of pmd's private stream data
+ * @return
+ *  - 0 if successful
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support STATEFUL operations.
+ *  - Returns -EBUSY if can't free stream as there are inflight operations
+ */
+typedef int (*compressdev_stream_free_t)(struct rte_compressdev *dev,
+		void *stream);
+
+/**
+ * Create driver private_xform data.
+ *
+ * @param dev
+ *   Compressdev device
+ * @param xform
+ *   xform data
+ * @param private_xform
+ *   ptr where handle of pmd's private_xform data should be stored
+ * @return
+ *  - if successful returns 0
+ *    and valid private_xform handle
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if comp device does not support the comp transform.
+ *  - Returns -ENOMEM if the private_xform could not be allocated.
+ */
+typedef int (*compressdev_private_xform_create_t)(struct rte_compressdev *dev,
+		const struct rte_comp_xform *xform, void **private_xform);
+
+/**
+ * Free driver private_xform data.
+ *
+ * @param dev
+ *   Compressdev device
+ * @param private_xform
+ *   handle of pmd's private_xform data
+ * @return
+ *  - 0 if successful
+ *  - <0 in error cases
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -EBUSY if can't free private_xform due to inflight operations
+ */
+typedef int (*compressdev_private_xform_free_t)(struct rte_compressdev *dev,
+		void *private_xform);
+
+/** comp device operations function pointer table */
+struct rte_compressdev_ops {
+	compressdev_configure_t dev_configure;	/**< Configure device. */
+	compressdev_start_t dev_start;		/**< Start device. */
+	compressdev_stop_t dev_stop;		/**< Stop device. */
+	compressdev_close_t dev_close;		/**< Close device. */
+
+	compressdev_info_get_t dev_infos_get;	/**< Get device info. */
+
+	compressdev_stats_get_t stats_get;
+	/**< Get device statistics. */
+	compressdev_stats_reset_t stats_reset;
+	/**< Reset device statistics. */
+
+	compressdev_queue_pair_setup_t queue_pair_setup;
+	/**< Set up a device queue pair. */
+	compressdev_queue_pair_release_t queue_pair_release;
+	/**< Release a queue pair. */
+
+	compressdev_stream_create_t stream_create;
+	/**< Create a comp stream and initialise its private data. */
+	compressdev_stream_free_t stream_free;
+	/**< Free a comp stream's private data. */
+
+	compressdev_private_xform_create_t private_xform_create;
+	/**< Create a comp private_xform and initialise its private data. */
+	compressdev_private_xform_free_t private_xform_free;
+	/**< Free a comp private_xform's data. */
+};
+
+/**
+ * @internal
+ *
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Allocates a new compressdev slot for an comp device and returns the pointer
+ * to that slot for the driver to use.
+ *
+ * @param name
+ *   Unique identifier name for each device
+ * @param socket_id
+ *   Socket to allocate resources on
+ * @return
+ *   - Slot in the rte_dev_devices array for a new device;
+ */
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_allocate(const char *name, int socket_id);
+
+/**
+ * @internal
+ *
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Release the specified compressdev device.
+ *
+ * @param dev
+ *   Compress device
+ * @return
+ *   - 0 on success, negative on error
+ */
+int __rte_experimental
+rte_compressdev_pmd_release_device(struct rte_compressdev *dev);
+
+
+/**
+ * @internal
+ *
+ * PMD assist function to parse initialisation arguments for comp driver
+ * when creating a new comp PMD device instance.
+ *
+ * PMD driver should set default values for that PMD before calling function,
+ * these default values will be over-written with successfully parsed values
+ * from args string.
+ *
+ * @param params
+ *   Parsed PMD initialisation parameters
+ * @param args
+ *   Input argument string to parse
+ * @return
+ *  - 0 on success
+ *  - errno on failure
+ */
+int __rte_experimental
+rte_compressdev_pmd_parse_input_args(
+		struct rte_compressdev_pmd_init_params *params,
+		const char *args);
+
+/**
+ * @internal
+ *
+ * PMD assist function to provide boiler plate code for comp driver to create
+ * and allocate resources for a new comp PMD device instance.
+ *
+ * @param name
+ *   Compress device name
+ * @param device
+ *   Base device instance
+ * @param params
+ *   PMD initialisation parameters
+ * @return
+ *  - comp device instance on success
+ *  - NULL on creation failure
+ */
+struct rte_compressdev * __rte_experimental
+rte_compressdev_pmd_create(const char *name,
+		struct rte_device *device,
+		size_t private_data_size,
+		struct rte_compressdev_pmd_init_params *params);
+
+/**
+ * @internal
+ *
+ * PMD assist function to provide boiler plate code for comp driver to
+ * destroy and free resources associated with a comp PMD device instance.
+ *
+ * @param dev
+ *   Compress device
+ * @return
+ *  - 0 on success
+ *  - errno on failure
+ */
+int __rte_experimental
+rte_compressdev_pmd_destroy(struct rte_compressdev *dev);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_COMPRESSDEV_PMD_H_ */
diff --git a/lib/librte_compressdev/rte_compressdev_version.map b/lib/librte_compressdev/rte_compressdev_version.map
new file mode 100644
index 00000000..6f900b67
--- /dev/null
+++ b/lib/librte_compressdev/rte_compressdev_version.map
@@ -0,0 +1,39 @@
+EXPERIMENTAL {
+        global:
+
+	rte_compressdev_capability_get;
+	rte_compressdev_close;
+	rte_compressdev_configure;
+	rte_compressdev_count;
+	rte_compressdev_dequeue_burst;
+	rte_compressdev_devices_get;
+	rte_compressdev_enqueue_burst;
+	rte_compressdev_get_dev_id;
+	rte_compressdev_get_feature_name;
+	rte_compressdev_info_get;
+	rte_compressdev_name_get;
+	rte_compressdev_pmd_allocate;
+	rte_compressdev_pmd_create;
+	rte_compressdev_pmd_destroy;
+	rte_compressdev_pmd_get_named_dev;
+	rte_compressdev_pmd_parse_input_args;
+	rte_compressdev_pmd_release_device;
+	rte_compressdev_private_xform_create;
+	rte_compressdev_private_xform_free;
+	rte_compressdev_queue_pair_count;
+	rte_compressdev_queue_pair_setup;
+	rte_compressdev_socket_id;
+	rte_compressdev_start;
+	rte_compressdev_stats_get;
+	rte_compressdev_stats_reset;
+	rte_compressdev_stop;
+	rte_compressdev_stream_create;
+	rte_compressdev_stream_free;
+	rte_comp_get_feature_name;
+	rte_comp_op_alloc;
+	rte_comp_op_bulk_alloc;
+	rte_comp_op_free;
+	rte_comp_op_pool_create;
+
+        local: *;
+};
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index bba8dee9..c1148887 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -23,6 +23,7 @@ SYMLINK-y-include += rte_crypto.h
 SYMLINK-y-include += rte_crypto_sym.h
 SYMLINK-y-include += rte_cryptodev.h
 SYMLINK-y-include += rte_cryptodev_pmd.h
+SYMLINK-y-include += rte_crypto_asym.h
 
 # versioning export map
 EXPORT_MAP := rte_cryptodev_version.map
diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build
index 234da323..295f509e 100644
--- a/lib/librte_cryptodev/meson.build
+++ b/lib/librte_cryptodev/meson.build
@@ -1,10 +1,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 3
+version = 4
 sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c')
 headers = files('rte_cryptodev.h',
 	'rte_cryptodev_pmd.h',
 	'rte_crypto.h',
-	'rte_crypto_sym.h')
+	'rte_crypto_sym.h',
+	'rte_crypto_asym.h')
 deps += ['kvargs', 'mbuf']
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 95cf8615..fd5ef3a8 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -23,6 +23,7 @@ extern "C" {
 #include <rte_common.h>
 
 #include "rte_crypto_sym.h"
+#include "rte_crypto_asym.h"
 
 /** Crypto operation types */
 enum rte_crypto_op_type {
@@ -30,6 +31,8 @@ enum rte_crypto_op_type {
 	/**< Undefined operation type */
 	RTE_CRYPTO_OP_TYPE_SYMMETRIC,
 	/**< Symmetric operation */
+	RTE_CRYPTO_OP_TYPE_ASYMMETRIC
+	/**< Asymmetric operation */
 };
 
 /** Status of crypto operation */
@@ -73,20 +76,37 @@ enum rte_crypto_op_sess_type {
  * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() .
  */
 struct rte_crypto_op {
-	uint8_t type;
-	/**< operation type */
-	uint8_t status;
-	/**<
-	 * operation status - this is reset to
-	 * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and
-	 * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
-	 * is successfully processed by a crypto PMD
-	 */
-	uint8_t sess_type;
-	/**< operation session type */
-
-	uint8_t reserved[5];
-	/**< Reserved bytes to fill 64 bits for future additions */
+	__extension__
+	union {
+		uint64_t raw;
+		__extension__
+		struct {
+			uint8_t type;
+			/**< operation type */
+			uint8_t status;
+			/**<
+			 * operation status - this is reset to
+			 * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation
+			 * from mempool and will be set to
+			 * RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
+			 * is successfully processed by a crypto PMD
+			 */
+			uint8_t sess_type;
+			/**< operation session type */
+			uint8_t reserved[3];
+			/**< Reserved bytes to fill 64 bits for
+			 * future additions
+			 */
+			uint16_t private_data_offset;
+			/**< Offset to indicate start of private data (if any).
+			 * The offset is counted from the start of the
+			 * rte_crypto_op including IV.
+			 * The private data may be used by the application
+			 * to store information which should remain untouched
+			 * in the library/driver
+			 */
+		};
+	};
 	struct rte_mempool *mempool;
 	/**< crypto operation mempool which operation is allocated from */
 
@@ -97,6 +117,10 @@ struct rte_crypto_op {
 	union {
 		struct rte_crypto_sym_op sym[0];
 		/**< Symmetric operation parameters */
+
+		struct rte_crypto_asym_op asym[0];
+		/**< Asymmetric operation parameters */
+
 	}; /**< operation specific parameters */
 };
 
@@ -117,6 +141,9 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
 	case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
 		__rte_crypto_sym_op_reset(op->sym);
 		break;
+	case RTE_CRYPTO_OP_TYPE_ASYMMETRIC:
+		memset(op->asym, 0, sizeof(struct rte_crypto_asym_op));
+	break;
 	case RTE_CRYPTO_OP_TYPE_UNDEFINED:
 	default:
 		break;
@@ -283,9 +310,14 @@ __rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size)
 	if (likely(op->mempool != NULL)) {
 		priv_size = __rte_crypto_op_get_priv_data_size(op->mempool);
 
-		if (likely(priv_size >= size))
-			return (void *)((uint8_t *)(op + 1) +
+		if (likely(priv_size >= size)) {
+			if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+				return (void *)((uint8_t *)(op + 1) +
 					sizeof(struct rte_crypto_sym_op));
+			if (op->type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+				return (void *)((uint8_t *)(op + 1) +
+					sizeof(struct rte_crypto_asym_op));
+		}
 	}
 
 	return NULL;
@@ -388,6 +420,24 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op,
 	return __rte_crypto_sym_op_attach_sym_session(op->sym, sess);
 }
 
+/**
+ * Attach a asymmetric session to a crypto operation
+ *
+ * @param	op	crypto operation, must be of type asymmetric
+ * @param	sess	cryptodev session
+ */
+static inline int
+rte_crypto_op_attach_asym_session(struct rte_crypto_op *op,
+		struct rte_cryptodev_asym_session *sess)
+{
+	if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_ASYMMETRIC))
+		return -1;
+
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+	op->asym->session = sess;
+	return 0;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_cryptodev/rte_crypto_asym.h b/lib/librte_cryptodev/rte_crypto_asym.h
new file mode 100644
index 00000000..5e185b2d
--- /dev/null
+++ b/lib/librte_cryptodev/rte_crypto_asym.h
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium Networks
+ */
+
+#ifndef _RTE_CRYPTO_ASYM_H_
+#define _RTE_CRYPTO_ASYM_H_
+
+/**
+ * @file rte_crypto_asym.h
+ *
+ * RTE Definitions for Asymmetric Cryptography
+ *
+ * Defines asymmetric algorithms and modes, as well as supported
+ * asymmetric crypto operations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <stdint.h>
+
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+
+typedef struct rte_crypto_param_t {
+	uint8_t *data;
+	/**< pointer to buffer holding data */
+	rte_iova_t iova;
+	/**< IO address of data buffer */
+	size_t length;
+	/**< length of data in bytes */
+} rte_crypto_param;
+
+/** asym xform type name strings */
+extern const char *
+rte_crypto_asym_xform_strings[];
+
+/** asym operations type name strings */
+extern const char *
+rte_crypto_asym_op_strings[];
+
+/**
+ * Asymmetric crypto transformation types.
+ * Each xform type maps to one asymmetric algorithm
+ * performing specific operation
+ *
+ */
+enum rte_crypto_asym_xform_type {
+	RTE_CRYPTO_ASYM_XFORM_UNSPECIFIED = 0,
+	/**< Invalid xform. */
+	RTE_CRYPTO_ASYM_XFORM_NONE,
+	/**< Xform type None.
+	 * May be supported by PMD to support
+	 * passthrough op for debugging purpose.
+	 * if xform_type none , op_type is disregarded.
+	 */
+	RTE_CRYPTO_ASYM_XFORM_RSA,
+	/**< RSA. Performs Encrypt, Decrypt, Sign and Verify.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_DH,
+	/**< Diffie-Hellman.
+	 * Performs Key Generate and Shared Secret Compute.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_DSA,
+	/**< Digital Signature Algorithm
+	 * Performs Signature Generation and Verification.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_MODINV,
+	/**< Modular Inverse
+	 * Perform Modulus inverse b^(-1) mod n
+	 */
+	RTE_CRYPTO_ASYM_XFORM_MODEX,
+	/**< Modular Exponentiation
+	 * Perform Modular Exponentiation b^e mod n
+	 */
+	RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END
+	/**< End of list */
+};
+
+/**
+ * Asymmetric crypto operation type variants
+ */
+enum rte_crypto_asym_op_type {
+	RTE_CRYPTO_ASYM_OP_ENCRYPT,
+	/**< Asymmetric Encrypt operation */
+	RTE_CRYPTO_ASYM_OP_DECRYPT,
+	/**< Asymmetric Decrypt operation */
+	RTE_CRYPTO_ASYM_OP_SIGN,
+	/**< Signature Generation operation */
+	RTE_CRYPTO_ASYM_OP_VERIFY,
+	/**< Signature Verification operation */
+	RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
+	/**< DH Private Key generation operation */
+	RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
+	/**< DH Public Key generation operation */
+	RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
+	/**< DH Shared Secret compute operation */
+	RTE_CRYPTO_ASYM_OP_LIST_END
+};
+
+/**
+ * Padding types for RSA signature.
+ */
+enum rte_crypto_rsa_padding_type {
+	RTE_CRYPTO_RSA_PADDING_NONE = 0,
+	/**< RSA no padding scheme */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT0,
+	/**< RSA PKCS#1 V1.5 Block Type 0 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT1,
+	/**< RSA PKCS#1 V1.5 Block Type 01 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT2,
+	/**< RSA PKCS#1 V1.5 Block Type 02 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PADDING_OAEP,
+	/**< RSA PKCS#1 OAEP padding scheme */
+	RTE_CRYPTO_RSA_PADDING_PSS,
+	/**< RSA PKCS#1 PSS padding scheme */
+	RTE_CRYPTO_RSA_PADDING_TYPE_LIST_END
+};
+
+/**
+ * RSA private key type enumeration
+ *
+ * enumerates private key format required to perform RSA crypto
+ * transform.
+ *
+ */
+enum rte_crypto_rsa_priv_key_type {
+	RTE_RSA_KEY_TYPE_EXP,
+	/**< RSA private key is an exponent */
+	RTE_RSA_KET_TYPE_QT,
+	/**< RSA private key is in quintuple format
+	 * See rte_crypto_rsa_priv_key_qt
+	 */
+};
+
+/**
+ * Structure describing RSA private key in quintuple format.
+ * See PKCS V1.5 RSA Cryptography Standard.
+ */
+struct rte_crypto_rsa_priv_key_qt {
+	rte_crypto_param p;
+	/**< p - Private key component P
+	 * Private key component of RSA parameter  required for CRT method
+	 * of private key operations in Octet-string network byte order
+	 * format.
+	 */
+
+	rte_crypto_param q;
+	/**< q - Private key component Q
+	 * Private key component of RSA parameter  required for CRT method
+	 * of private key operations in Octet-string network byte order
+	 * format.
+	 */
+
+	rte_crypto_param dP;
+	/**< dP - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * dP = d mod ( p - 1 )
+	 */
+
+	rte_crypto_param dQ;
+	/**< dQ - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * dQ = d mod ( q - 1 )
+	 */
+
+	rte_crypto_param qInv;
+	/**< qInv - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * qInv = inv q mod p
+	 */
+};
+
+/**
+ * Asymmetric RSA transform data
+ *
+ * Structure describing RSA xform params
+ *
+ */
+struct rte_crypto_rsa_xform {
+	rte_crypto_param n;
+	/**< n - Prime modulus
+	 * Prime modulus data of RSA operation in Octet-string network
+	 * byte order format.
+	 */
+
+	rte_crypto_param e;
+	/**< e - Public key exponent
+	 * Public key exponent used for RSA public key operations in Octet-
+	 * string network byte order format.
+	 */
+
+	enum rte_crypto_rsa_priv_key_type key_type;
+
+	__extension__
+	union {
+		rte_crypto_param d;
+		/**< d - Private key exponent
+		 * Private key exponent used for RSA
+		 * private key operations in
+		 * Octet-string  network byte order format.
+		 */
+
+		struct rte_crypto_rsa_priv_key_qt qt;
+		/**< qt - Private key in quintuple format */
+	};
+};
+
+/**
+ * Asymmetric Modular exponentiation transform data
+ *
+ * Structure describing modular exponentation xform param
+ *
+ */
+struct rte_crypto_modex_xform {
+	rte_crypto_param modulus;
+	/**< modulus
+	 * Prime modulus of the modexp transform operation in octet-string
+	 * network byte order format.
+	 */
+
+	rte_crypto_param exponent;
+	/**< exponent
+	 * Private exponent of the modexp transform operation in
+	 * octet-string network byte order format.
+	 */
+};
+
+/**
+ * Asymmetric modular inverse transform operation
+ *
+ * Structure describing modulus inverse xform params
+ *
+ */
+struct rte_crypto_modinv_xform {
+	rte_crypto_param modulus;
+	/**<
+	 * Pointer to the prime modulus data for modular
+	 * inverse operation in octet-string network byte
+	 * order format.
+	 */
+};
+
+/**
+ * Asymmetric DH transform data
+ *
+ * Structure describing deffie-hellman xform params
+ *
+ */
+struct rte_crypto_dh_xform {
+	enum rte_crypto_asym_op_type type;
+	/**< Setup xform for key generate or shared secret compute */
+
+	rte_crypto_param p;
+	/**< p : Prime modulus data
+	 * DH prime modulous data in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param g;
+	/**< g : Generator
+	 * DH group generator data in octet-string network byte order
+	 * format.
+	 *
+	 */
+};
+
+/**
+ * Asymmetric Digital Signature transform operation
+ *
+ * Structure describing DSA xform params
+ *
+ */
+struct rte_crypto_dsa_xform {
+	rte_crypto_param p;
+	/**< p - Prime modulus
+	 * Prime modulus data for DSA operation in Octet-string network byte
+	 * order format.
+	 */
+	rte_crypto_param q;
+	/**< q : Order of the subgroup.
+	 * Order of the subgroup data in Octet-string network byte order
+	 * format.
+	 * (p-1) % q = 0
+	 */
+	rte_crypto_param g;
+	/**< g: Generator of the subgroup
+	 * Generator  data in Octet-string network byte order format.
+	 */
+	rte_crypto_param x;
+	/**< x: Private key of the signer in octet-string network
+	 * byte order format.
+	 * Used when app has pre-defined private key.
+	 * Valid only when xform chain is DSA ONLY.
+	 * if xform chain is DH private key generate + DSA, then DSA sign
+	 * compute will use internally generated key.
+	 */
+};
+
+/**
+ * Operations params for modular operations:
+ * exponentiation and invert
+ *
+ */
+struct rte_crypto_mod_op_param {
+	rte_crypto_param base;
+	/**<
+	 * Pointer to base of modular exponentiation/inversion data in
+	 * Octet-string network byte order format.
+	 */
+};
+
+/**
+ * Asymmetric crypto transform data
+ *
+ * Structure describing asym xforms.
+ */
+struct rte_crypto_asym_xform {
+	struct rte_crypto_asym_xform *next;
+	/**< Pointer to next xform to set up xform chain.*/
+	enum rte_crypto_asym_xform_type xform_type;
+	/**< Asymmetric crypto transform */
+
+	__extension__
+	union {
+		struct rte_crypto_rsa_xform rsa;
+		/**< RSA xform parameters */
+
+		struct rte_crypto_modex_xform modex;
+		/**< Modular Exponentiation xform parameters */
+
+		struct rte_crypto_modinv_xform modinv;
+		/**< Modulus Inverse xform parameters */
+
+		struct rte_crypto_dh_xform dh;
+		/**< DH xform parameters */
+
+		struct rte_crypto_dsa_xform dsa;
+		/**< DSA xform parameters */
+	};
+};
+
+struct rte_cryptodev_asym_session;
+
+/**
+ * RSA operation params
+ *
+ */
+struct rte_crypto_rsa_op_param {
+	enum rte_crypto_asym_op_type op_type;
+	/**< Type of RSA operation for transform */;
+
+	rte_crypto_param message;
+	/**<
+	 * Pointer to data
+	 * - to be encrypted for RSA public encrypt.
+	 * - to be decrypted for RSA private decrypt.
+	 * - to be signed for RSA sign generation.
+	 * - to be authenticated for RSA sign verification.
+	 */
+
+	rte_crypto_param sign;
+	/**<
+	 * Pointer to RSA signature data. If operation is RSA
+	 * sign @ref RTE_CRYPTO_ASYM_OP_SIGN, buffer will be
+	 * over-written with generated signature.
+	 *
+	 * Length of the signature data will be equal to the
+	 * RSA prime modulus length.
+	 */
+
+	enum rte_crypto_rsa_padding_type pad;
+	/**< RSA padding scheme to be used for transform */
+
+	enum rte_crypto_auth_algorithm md;
+	/**< Hash algorithm to be used for data hash if padding
+	 * scheme is either OAEP or PSS. Valid hash algorithms
+	 * are:
+	 * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+	 */
+
+	enum rte_crypto_auth_algorithm mgf1md;
+	/**<
+	 * Hash algorithm to be used for mask generation if
+	 * padding scheme is either OAEP or PSS. If padding
+	 * scheme is unspecified data hash algorithm is used
+	 * for mask generation. Valid hash algorithms are:
+	 * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+	 */
+};
+
+/**
+ * Diffie-Hellman Operations params.
+ * @note:
+ */
+struct rte_crypto_dh_op_param {
+	rte_crypto_param pub_key;
+	/**<
+	 * Output generated public key when xform type is
+	 * DH PUB_KEY_GENERATION.
+	 * Input peer public key when xform type is DH
+	 * SHARED_SECRET_COMPUTATION
+	 * pub_key is in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param priv_key;
+	/**<
+	 * Output generated private key if xform type is
+	 * DH PRIVATE_KEY_GENERATION
+	 * Input when xform type is DH SHARED_SECRET_COMPUTATION.
+	 * priv_key is in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param shared_secret;
+	/**<
+	 * Output with calculated shared secret
+	 * when dh xform set up with op type = SHARED_SECRET_COMPUTATION.
+	 * shared_secret is an octet-string network byte order format.
+	 *
+	 */
+};
+
+/**
+ * DSA Operations params
+ *
+ */
+struct rte_crypto_dsa_op_param {
+	enum rte_crypto_asym_op_type op_type;
+	/**< Signature Generation or Verification */
+	rte_crypto_param message;
+	/**< input message to be signed or verified */
+	rte_crypto_param r;
+	/**< dsa sign component 'r' value
+	 *
+	 * output if op_type = sign generate,
+	 * input if op_type = sign verify
+	 */
+	rte_crypto_param s;
+	/**< dsa sign component 's' value
+	 *
+	 * output if op_type = sign generate,
+	 * input if op_type = sign verify
+	 */
+	rte_crypto_param y;
+	/**< y : Public key of the signer.
+	 * Public key data of the signer in Octet-string network byte order
+	 * format.
+	 * y = g^x mod p
+	 */
+};
+
+/**
+ * Asymmetric Cryptographic Operation.
+ *
+ * Structure describing asymmetric crypto operation params.
+ *
+ */
+struct rte_crypto_asym_op {
+	struct rte_cryptodev_asym_session *session;
+	/**< Handle for the initialised session context */
+
+	__extension__
+	union {
+		struct rte_crypto_rsa_op_param rsa;
+		struct rte_crypto_mod_op_param modex;
+		struct rte_crypto_mod_op_param modinv;
+		struct rte_crypto_dh_op_param dh;
+		struct rte_crypto_dsa_op_param dsa;
+	};
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTO_ASYM_H_ */
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index 60797e9c..eb5afc5e 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -245,6 +245,23 @@ enum rte_crypto_auth_algorithm {
 	RTE_CRYPTO_AUTH_ZUC_EIA3,
 	/**< ZUC algorithm in EIA3 mode */
 
+	RTE_CRYPTO_AUTH_SHA3_224,
+	/**< 224 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_224_HMAC,
+	/**< HMAC using 224 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_256,
+	/**< 256 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_256_HMAC,
+	/**< HMAC using 256 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_384,
+	/**< 384 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_384_HMAC,
+	/**< HMAC using 384 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_512,
+	/**< 512 bit SHA3 algorithm. */
+	RTE_CRYPTO_AUTH_SHA3_512_HMAC,
+	/**< HMAC using 512 bit SHA3 algorithm. */
+
 	RTE_CRYPTO_AUTH_LIST_END
 };
 
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index 8745b6b0..63ae23f0 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -166,6 +166,31 @@ rte_crypto_aead_operation_strings[] = {
 	[RTE_CRYPTO_AEAD_OP_DECRYPT]	= "decrypt"
 };
 
+/**
+ * Asymmetric crypto transform operation strings identifiers.
+ */
+const char *rte_crypto_asym_xform_strings[] = {
+	[RTE_CRYPTO_ASYM_XFORM_NONE]	= "none",
+	[RTE_CRYPTO_ASYM_XFORM_RSA]	= "rsa",
+	[RTE_CRYPTO_ASYM_XFORM_MODEX]	= "modexp",
+	[RTE_CRYPTO_ASYM_XFORM_MODINV]	= "modinv",
+	[RTE_CRYPTO_ASYM_XFORM_DH]	= "dh",
+	[RTE_CRYPTO_ASYM_XFORM_DSA]	= "dsa",
+};
+
+/**
+ * Asymmetric crypto operation strings identifiers.
+ */
+const char *rte_crypto_asym_op_strings[] = {
+	[RTE_CRYPTO_ASYM_OP_ENCRYPT]	= "encrypt",
+	[RTE_CRYPTO_ASYM_OP_DECRYPT]	= "decrypt",
+	[RTE_CRYPTO_ASYM_OP_SIGN]	= "sign",
+	[RTE_CRYPTO_ASYM_OP_VERIFY]	= "verify",
+	[RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE]	= "priv_key_generate",
+	[RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE] = "pub_key_generate",
+	[RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE] = "sharedsecret_compute",
+};
+
 int
 rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum,
 		const char *algo_string)
@@ -217,6 +242,24 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
 	return -1;
 }
 
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+		const char *xform_string)
+{
+	unsigned int i;
+
+	for (i = 1; i < RTE_DIM(rte_crypto_asym_xform_strings); i++) {
+		if (strcmp(xform_string,
+			rte_crypto_asym_xform_strings[i]) == 0) {
+			*xform_enum = (enum rte_crypto_asym_xform_type) i;
+			return 0;
+		}
+	}
+
+	/* Invalid string */
+	return -1;
+}
+
 /**
  * The crypto auth operation strings identifiers.
  * It could be used in application command line.
@@ -262,19 +305,62 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
 
 }
 
-#define param_range_check(x, y) \
-	(((x < y.min) || (x > y.max)) || \
-	(y.increment != 0 && (x % y.increment) != 0))
+static int
+param_range_check(uint16_t size, const struct rte_crypto_param_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+		const struct rte_cryptodev_asym_capability_idx *idx)
+{
+	const struct rte_cryptodev_capabilities *capability;
+	struct rte_cryptodev_info dev_info;
+	unsigned int i = 0;
+
+	memset(&dev_info, 0, sizeof(struct rte_cryptodev_info));
+	rte_cryptodev_info_get(dev_id, &dev_info);
+
+	while ((capability = &dev_info.capabilities[i++])->op !=
+			RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+		if (capability->op != RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+			continue;
+
+		if (capability->asym.xform_capa.xform_type == idx->type)
+			return &capability->asym.xform_capa;
+	}
+	return NULL;
+};
 
 int
 rte_cryptodev_sym_capability_check_cipher(
 		const struct rte_cryptodev_symmetric_capability *capability,
 		uint16_t key_size, uint16_t iv_size)
 {
-	if (param_range_check(key_size, capability->cipher.key_size))
+	if (param_range_check(key_size, &capability->cipher.key_size) != 0)
 		return -1;
 
-	if (param_range_check(iv_size, capability->cipher.iv_size))
+	if (param_range_check(iv_size, &capability->cipher.iv_size) != 0)
 		return -1;
 
 	return 0;
@@ -285,13 +371,13 @@ rte_cryptodev_sym_capability_check_auth(
 		const struct rte_cryptodev_symmetric_capability *capability,
 		uint16_t key_size, uint16_t digest_size, uint16_t iv_size)
 {
-	if (param_range_check(key_size, capability->auth.key_size))
+	if (param_range_check(key_size, &capability->auth.key_size) != 0)
 		return -1;
 
-	if (param_range_check(digest_size, capability->auth.digest_size))
+	if (param_range_check(digest_size, &capability->auth.digest_size) != 0)
 		return -1;
 
-	if (param_range_check(iv_size, capability->auth.iv_size))
+	if (param_range_check(iv_size, &capability->auth.iv_size) != 0)
 		return -1;
 
 	return 0;
@@ -303,20 +389,56 @@ rte_cryptodev_sym_capability_check_aead(
 		uint16_t key_size, uint16_t digest_size, uint16_t aad_size,
 		uint16_t iv_size)
 {
-	if (param_range_check(key_size, capability->aead.key_size))
+	if (param_range_check(key_size, &capability->aead.key_size) != 0)
 		return -1;
 
-	if (param_range_check(digest_size, capability->aead.digest_size))
+	if (param_range_check(digest_size, &capability->aead.digest_size) != 0)
 		return -1;
 
-	if (param_range_check(aad_size, capability->aead.aad_size))
+	if (param_range_check(aad_size, &capability->aead.aad_size) != 0)
 		return -1;
 
-	if (param_range_check(iv_size, capability->aead.iv_size))
+	if (param_range_check(iv_size, &capability->aead.iv_size) != 0)
 		return -1;
 
 	return 0;
 }
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+	enum rte_crypto_asym_op_type op_type)
+{
+	if (capability->op_types & (1 << op_type))
+		return 1;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+	uint16_t modlen)
+{
+	/* no need to check for limits, if min or max = 0 */
+	if (capability->modlen.min != 0) {
+		if (modlen < capability->modlen.min)
+			return -1;
+	}
+
+	if (capability->modlen.max != 0) {
+		if (modlen > capability->modlen.max)
+			return -1;
+	}
+
+	/* in any case, check if given modlen is module increment */
+	if (capability->modlen.increment != 0) {
+		if (modlen % (capability->modlen.increment))
+			return -1;
+	}
+
+	return 0;
+}
+
 
 const char *
 rte_cryptodev_get_feature_name(uint64_t flag)
@@ -340,12 +462,22 @@ rte_cryptodev_get_feature_name(uint64_t flag)
 		return "CPU_AESNI";
 	case RTE_CRYPTODEV_FF_HW_ACCELERATED:
 		return "HW_ACCELERATED";
-	case RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER:
-		return "MBUF_SCATTER_GATHER";
+	case RTE_CRYPTODEV_FF_IN_PLACE_SGL:
+		return "IN_PLACE_SGL";
+	case RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT:
+		return "OOP_SGL_IN_SGL_OUT";
+	case RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT:
+		return "OOP_SGL_IN_LB_OUT";
+	case RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT:
+		return "OOP_LB_IN_SGL_OUT";
+	case RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT:
+		return "OOP_LB_IN_LB_OUT";
 	case RTE_CRYPTODEV_FF_CPU_NEON:
 		return "CPU_NEON";
 	case RTE_CRYPTODEV_FF_CPU_ARM_CE:
 		return "CPU_ARM_CE";
+	case RTE_CRYPTODEV_FF_SECURITY:
+		return "SECURITY_PROTOCOL";
 	default:
 		return NULL;
 	}
@@ -680,50 +812,6 @@ rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs,
 }
 
 int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id)
-{
-	struct rte_cryptodev *dev;
-
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
-	if (queue_pair_id >= dev->data->nb_queue_pairs) {
-		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
-		return -EINVAL;
-	}
-
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP);
-
-	return dev->dev_ops->queue_pair_start(dev, queue_pair_id);
-
-}
-
-int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id)
-{
-	struct rte_cryptodev *dev;
-
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
-	if (queue_pair_id >= dev->data->nb_queue_pairs) {
-		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
-		return -EINVAL;
-	}
-
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP);
-
-	return dev->dev_ops->queue_pair_stop(dev, queue_pair_id);
-
-}
-
-int
 rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
 {
 	struct rte_cryptodev *dev;
@@ -943,6 +1031,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
 
 	dev_info->driver_name = dev->device->driver->name;
+	dev_info->device = dev->device;
 }
 
 
@@ -1075,8 +1164,46 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
 
 	index = dev->driver_id;
 
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_configure, -ENOTSUP);
+
 	if (sess->sess_private_data[index] == NULL) {
-		ret = dev->dev_ops->session_configure(dev, xforms, sess, mp);
+		ret = dev->dev_ops->sym_session_configure(dev, xforms,
+							sess, mp);
+		if (ret < 0) {
+			CDEV_LOG_ERR(
+				"dev_id %d failed to configure session details",
+				dev_id);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+		struct rte_cryptodev_asym_session *sess,
+		struct rte_crypto_asym_xform *xforms,
+		struct rte_mempool *mp)
+{
+	struct rte_cryptodev *dev;
+	uint8_t index;
+	int ret;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	if (sess == NULL || xforms == NULL || dev == NULL)
+		return -EINVAL;
+
+	index = dev->driver_id;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_configure,
+				-ENOTSUP);
+
+	if (sess->sess_private_data[index] == NULL) {
+		ret = dev->dev_ops->asym_session_configure(dev,
+							xforms,
+							sess, mp);
 		if (ret < 0) {
 			CDEV_LOG_ERR(
 				"dev_id %d failed to configure session details",
@@ -1099,69 +1226,54 @@ rte_cryptodev_sym_session_create(struct rte_mempool *mp)
 		return NULL;
 	}
 
-	/* Clear device session pointer */
-	memset(sess, 0, (sizeof(void *) * nb_drivers));
+	/* Clear device session pointer.
+	 * Include the flag indicating presence of user data
+	 */
+	memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 
 	return sess;
 }
 
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *sess)
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mp)
 {
-	struct rte_cryptodev *dev;
+	struct rte_cryptodev_asym_session *sess;
 
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
-		return -EINVAL;
+	/* Allocate a session structure from the session pool */
+	if (rte_mempool_get(mp, (void **)&sess)) {
+		CDEV_LOG_ERR("couldn't get object from session mempool");
+		return NULL;
 	}
 
-	dev = &rte_crypto_devices[dev_id];
-
-	/* The API is optional, not returning error if driver do not suuport */
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0);
-
-	void *sess_priv = get_session_private_data(sess, dev->driver_id);
-
-	if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) {
-		CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session",
-				dev_id, qp_id);
-		return -EPERM;
-	}
+	/* Clear device session pointer.
+	 * Include the flag indicating presence of private data
+	 */
+	memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 
-	return 0;
+	return sess;
 }
 
 int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
+rte_cryptodev_sym_session_clear(uint8_t dev_id,
 		struct rte_cryptodev_sym_session *sess)
 {
 	struct rte_cryptodev *dev;
 
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
 
-	/* The API is optional, not returning error if driver do not suuport */
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0);
+	if (dev == NULL || sess == NULL)
+		return -EINVAL;
 
-	void *sess_priv = get_session_private_data(sess, dev->driver_id);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_clear, -ENOTSUP);
 
-	if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) {
-		CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session",
-				dev_id, qp_id);
-		return -EPERM;
-	}
+	dev->dev_ops->sym_session_clear(dev, sess);
 
 	return 0;
 }
 
-int
-rte_cryptodev_sym_session_clear(uint8_t dev_id,
-		struct rte_cryptodev_sym_session *sess)
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+		struct rte_cryptodev_asym_session *sess)
 {
 	struct rte_cryptodev *dev;
 
@@ -1170,7 +1282,9 @@ rte_cryptodev_sym_session_clear(uint8_t dev_id,
 	if (dev == NULL || sess == NULL)
 		return -EINVAL;
 
-	dev->dev_ops->session_clear(dev, sess);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_clear, -ENOTSUP);
+
+	dev->dev_ops->asym_session_clear(dev, sess);
 
 	return 0;
 }
@@ -1187,7 +1301,7 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
 
 	/* Check that all device private data has been freed */
 	for (i = 0; i < nb_drivers; i++) {
-		sess_priv = get_session_private_data(sess, i);
+		sess_priv = get_sym_session_private_data(sess, i);
 		if (sess_priv != NULL)
 			return -EBUSY;
 	}
@@ -1199,18 +1313,55 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
 	return 0;
 }
 
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess)
+{
+	uint8_t i;
+	void *sess_priv;
+	struct rte_mempool *sess_mp;
+
+	if (sess == NULL)
+		return -EINVAL;
+
+	/* Check that all device private data has been freed */
+	for (i = 0; i < nb_drivers; i++) {
+		sess_priv = get_asym_session_private_data(sess, i);
+		if (sess_priv != NULL)
+			return -EBUSY;
+	}
+
+	/* Return session to mempool */
+	sess_mp = rte_mempool_from_obj(sess);
+	rte_mempool_put(sess_mp, sess);
+
+	return 0;
+}
+
+
 unsigned int
-rte_cryptodev_get_header_session_size(void)
+rte_cryptodev_sym_get_header_session_size(void)
 {
 	/*
 	 * Header contains pointers to the private data
-	 * of all registered drivers
+	 * of all registered drivers, and a flag which
+	 * indicates presence of user data
 	 */
-	return (sizeof(void *) * nb_drivers);
+	return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
+}
+
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void)
+{
+	/*
+	 * Header contains pointers to the private data
+	 * of all registered drivers, and a flag which
+	 * indicates presence of private data
+	 */
+	return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 }
 
 unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id)
+rte_cryptodev_sym_get_private_session_size(uint8_t dev_id)
 {
 	struct rte_cryptodev *dev;
 	unsigned int header_size = sizeof(void *) * nb_drivers;
@@ -1221,10 +1372,10 @@ rte_cryptodev_get_private_session_size(uint8_t dev_id)
 
 	dev = rte_cryptodev_pmd_get_dev(dev_id);
 
-	if (*dev->dev_ops->session_get_size == NULL)
+	if (*dev->dev_ops->sym_session_get_size == NULL)
 		return 0;
 
-	priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
+	priv_sess_size = (*dev->dev_ops->sym_session_get_size)(dev);
 
 	/*
 	 * If size is less than session header size,
@@ -1238,6 +1389,61 @@ rte_cryptodev_get_private_session_size(uint8_t dev_id)
 
 }
 
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+	unsigned int header_size = sizeof(void *) * nb_drivers;
+	unsigned int priv_sess_size;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id))
+		return 0;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	if (*dev->dev_ops->asym_session_get_size == NULL)
+		return 0;
+
+	priv_sess_size = (*dev->dev_ops->asym_session_get_size)(dev);
+	if (priv_sess_size < header_size)
+		return header_size;
+
+	return priv_sess_size;
+
+}
+
+int __rte_experimental
+rte_cryptodev_sym_session_set_user_data(
+					struct rte_cryptodev_sym_session *sess,
+					void *data,
+					uint16_t size)
+{
+	uint16_t off_set = sizeof(void *) * nb_drivers;
+	uint8_t *user_data_present = (uint8_t *)sess + off_set;
+
+	if (sess == NULL)
+		return -EINVAL;
+
+	*user_data_present = 1;
+	off_set += sizeof(uint8_t);
+	rte_memcpy((uint8_t *)sess + off_set, data, size);
+	return 0;
+}
+
+void * __rte_experimental
+rte_cryptodev_sym_session_get_user_data(
+					struct rte_cryptodev_sym_session *sess)
+{
+	uint16_t off_set = sizeof(void *) * nb_drivers;
+	uint8_t *user_data_present = (uint8_t *)sess + off_set;
+
+	if (sess == NULL || !*user_data_present)
+		return NULL;
+
+	off_set += sizeof(uint8_t);
+	return (uint8_t *)sess + off_set;
+}
+
 /** Initialise rte_crypto_op mempool element */
 static void
 rte_crypto_op_init(struct rte_mempool *mempool,
@@ -1265,9 +1471,17 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
 	struct rte_crypto_op_pool_private *priv;
 
 	unsigned elt_size = sizeof(struct rte_crypto_op) +
-			sizeof(struct rte_crypto_sym_op) +
 			priv_size;
 
+	if (type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+		elt_size += sizeof(struct rte_crypto_sym_op);
+	} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
+		elt_size += sizeof(struct rte_crypto_asym_op);
+	} else {
+		CDEV_LOG_ERR("Invalid op_type\n");
+		return NULL;
+	}
+
 	/* lookup mempool in case already allocated */
 	struct rte_mempool *mp = rte_mempool_lookup(name);
 
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index c8fa6893..4099823f 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -1,32 +1,5 @@
-/*-
- *
- *   Copyright(c) 2015-2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2017 Intel Corporation.
  */
 
 #ifndef _RTE_CRYPTODEV_H_
@@ -65,7 +38,6 @@ extern const char **rte_cyptodev_names;
 		RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
 			RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
 #define CDEV_LOG_DEBUG(...) \
 	RTE_LOG(DEBUG, CRYPTODEV, \
 		RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
@@ -76,13 +48,6 @@ extern const char **rte_cyptodev_names;
 		RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
 			dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#else
-#define CDEV_LOG_DEBUG(...) (void)0
-#define CDEV_PMD_TRACE(...) (void)0
-#endif
-
-
-
 /**
  * A macro that points to an offset from the start
  * of the crypto operation structure (rte_crypto_op)
@@ -178,6 +143,35 @@ struct rte_cryptodev_symmetric_capability {
 	};
 };
 
+/**
+ * Asymmetric Xform Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_xform_capability {
+	enum rte_crypto_asym_xform_type xform_type;
+	/**< Transform type: RSA/MODEXP/DH/DSA/MODINV */
+
+	uint32_t op_types;
+	/**< bitmask for supported rte_crypto_asym_op_type */
+
+	__extension__
+	union {
+		struct rte_crypto_param_range modlen;
+		/**< Range of modulus length supported by modulus based xform.
+		 * Value 0 mean implementation default
+		 */
+	};
+};
+
+/**
+ * Asymmetric Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_capability {
+	struct rte_cryptodev_asymmetric_xform_capability xform_capa;
+};
+
+
 /** Structure used to capture a capability of a crypto device */
 struct rte_cryptodev_capabilities {
 	enum rte_crypto_op_type op;
@@ -187,6 +181,8 @@ struct rte_cryptodev_capabilities {
 	union {
 		struct rte_cryptodev_symmetric_capability sym;
 		/**< Symmetric operation capability parameters */
+		struct rte_cryptodev_asymmetric_capability asym;
+		/**< Asymmetric operation capability parameters */
 	};
 };
 
@@ -201,7 +197,17 @@ struct rte_cryptodev_sym_capability_idx {
 };
 
 /**
- *  Provide capabilities available for defined device and algorithm
+ * Structure used to describe asymmetric crypto xforms
+ * Each xform maps to one asym algorithm.
+ *
+ */
+struct rte_cryptodev_asym_capability_idx {
+	enum rte_crypto_asym_xform_type type;
+	/**< Asymmetric xform (algo) type */
+};
+
+/**
+ * Provide capabilities available for defined device and algorithm
  *
  * @param	dev_id		The identifier of the device.
  * @param	idx		Description of crypto algorithms.
@@ -215,6 +221,20 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
 		const struct rte_cryptodev_sym_capability_idx *idx);
 
 /**
+ *  Provide capabilities available for defined device and xform
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	idx		Description of asym crypto xform.
+ *
+ * @return
+ *   - Return description of the asymmetric crypto capability if exist.
+ *   - Return NULL if the capability not exist.
+ */
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+		const struct rte_cryptodev_asym_capability_idx *idx);
+
+/**
  * Check if key size and initial vector are supported
  * in crypto cipher capability
  *
@@ -270,6 +290,36 @@ rte_cryptodev_sym_capability_check_aead(
 		uint16_t iv_size);
 
 /**
+ * Check if op type is supported
+ *
+ * @param	capability	Description of the asymmetric crypto capability.
+ * @param	op_type		op type
+ *
+ * @return
+ *   - Return 1 if the op type is supported
+ *   - Return 0 if unsupported
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+		enum rte_crypto_asym_op_type op_type);
+
+/**
+ * Check if modulus length is in supported range
+ *
+ * @param	capability	Description of the asymmetric crypto capability.
+ * @param	modlen		modulus length.
+ *
+ * @return
+ *   - Return 0 if the parameters are in range of the capability.
+ *   - Return -1 if the parameters are out of range of the capability.
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+		uint16_t modlen);
+
+/**
  * Provide the cipher algorithm enum, given an algorithm string
  *
  * @param	algo_enum	A pointer to the cipher algorithm
@@ -314,6 +364,22 @@ int
 rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
 		const char *algo_string);
 
+/**
+ * Provide the Asymmetric xform enum, given an xform string
+ *
+ * @param	xform_enum	A pointer to the xform type
+ *				enum to be filled
+ * @param	xform_string	xform string
+ *
+ * @return
+ * - Return -1 if string is not valid
+ * - Return 0 if the string is valid
+ */
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+		const char *xform_string);
+
+
 /** Macro used at end of crypto PMD list */
 #define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \
 	{ RTE_CRYPTO_OP_TYPE_UNDEFINED }
@@ -327,31 +393,50 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
  *
  * Keep these flags synchronised with rte_cryptodev_get_feature_name()
  */
-#define	RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO	(1ULL << 0)
+#define	RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO		(1ULL << 0)
 /**< Symmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO	(1ULL << 1)
+#define	RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO		(1ULL << 1)
 /**< Asymmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING	(1ULL << 2)
+#define	RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING		(1ULL << 2)
 /**< Chaining symmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_CPU_SSE		(1ULL << 3)
+#define	RTE_CRYPTODEV_FF_CPU_SSE			(1ULL << 3)
 /**< Utilises CPU SIMD SSE instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AVX		(1ULL << 4)
+#define	RTE_CRYPTODEV_FF_CPU_AVX			(1ULL << 4)
 /**< Utilises CPU SIMD AVX instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AVX2		(1ULL << 5)
+#define	RTE_CRYPTODEV_FF_CPU_AVX2			(1ULL << 5)
 /**< Utilises CPU SIMD AVX2 instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AESNI		(1ULL << 6)
+#define	RTE_CRYPTODEV_FF_CPU_AESNI			(1ULL << 6)
 /**< Utilises CPU AES-NI instructions */
-#define	RTE_CRYPTODEV_FF_HW_ACCELERATED		(1ULL << 7)
-/**< Operations are off-loaded to an external hardware accelerator */
-#define	RTE_CRYPTODEV_FF_CPU_AVX512		(1ULL << 8)
+#define	RTE_CRYPTODEV_FF_HW_ACCELERATED			(1ULL << 7)
+/**< Operations are off-loaded to an
+ * external hardware accelerator
+ */
+#define	RTE_CRYPTODEV_FF_CPU_AVX512			(1ULL << 8)
 /**< Utilises CPU SIMD AVX512 instructions */
-#define	RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER	(1ULL << 9)
-/**< Scatter-gather mbufs are supported */
-#define	RTE_CRYPTODEV_FF_CPU_NEON		(1ULL << 10)
+#define	RTE_CRYPTODEV_FF_IN_PLACE_SGL			(1ULL << 9)
+/**< In-place Scatter-gather (SGL) buffers, with multiple segments,
+ * are supported
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT		(1ULL << 10)
+/**< Out-of-place Scatter-gather (SGL) buffers are
+ * supported in input and output
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT		(1ULL << 11)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment in output
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT		(1ULL << 12)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT		(1ULL << 13)
+/**< Out-of-place linear buffers (LB) are supported in input and output */
+#define	RTE_CRYPTODEV_FF_CPU_NEON			(1ULL << 14)
 /**< Utilises CPU NEON instructions */
-#define	RTE_CRYPTODEV_FF_CPU_ARM_CE		(1ULL << 11)
+#define	RTE_CRYPTODEV_FF_CPU_ARM_CE			(1ULL << 15)
 /**< Utilises ARM CPU Cryptographic Extensions */
-#define	RTE_CRYPTODEV_FF_SECURITY		(1ULL << 12)
+#define	RTE_CRYPTODEV_FF_SECURITY			(1ULL << 16)
 /**< Support Security Protocol Processing */
 
 
@@ -369,11 +454,12 @@ rte_cryptodev_get_feature_name(uint64_t flag);
 
 /**  Crypto device information */
 struct rte_cryptodev_info {
-	const char *driver_name;		/**< Driver name. */
-	uint8_t driver_id;			/**< Driver identifier */
-	struct rte_pci_device *pci_dev;		/**< PCI information. */
+	const char *driver_name;	/**< Driver name. */
+	uint8_t driver_id;		/**< Driver identifier */
+	struct rte_device *device;	/**< Generic device information. */
 
-	uint64_t feature_flags;			/**< Feature flags */
+	uint64_t feature_flags;
+	/**< Feature flags exposes HW/SW features for the given device */
 
 	const struct rte_cryptodev_capabilities *capabilities;
 	/**< Array of devices supported capabilities */
@@ -381,12 +467,17 @@ struct rte_cryptodev_info {
 	unsigned max_nb_queue_pairs;
 	/**< Maximum number of queues pairs supported by device. */
 
+	uint16_t min_mbuf_headroom_req;
+	/**< Minimum mbuf headroom required by device */
+
+	uint16_t min_mbuf_tailroom_req;
+	/**< Minimum mbuf tailroom required by device */
+
 	struct {
 		unsigned max_nb_sessions;
-		/**< Maximum number of sessions supported by device. */
-		unsigned int max_nb_sessions_per_qp;
-		/**< Maximum number of sessions per queue pair.
-		 * Default 0 for infinite sessions
+		/**< Maximum number of sessions supported by device.
+		 * If 0, the device does not have any limitation in
+		 * number of sessions that can be used.
 		 */
 	} sym;
 };
@@ -602,39 +693,6 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
 		struct rte_mempool *session_pool);
 
 /**
- * Start a specified queue pair of a device. It is used
- * when deferred_start flag of the specified queue is true.
- *
- * @param	dev_id		The identifier of the device
- * @param	queue_pair_id	The index of the queue pair to start. The value
- *				must be in the range [0, nb_queue_pair - 1]
- *				previously supplied to
- *				rte_crypto_dev_configure().
- * @return
- *   - 0: Success, the transmit queue is correctly set up.
- *   - -EINVAL: The dev_id or the queue_id out of range.
- *   - -ENOTSUP: The function not supported in PMD driver.
- */
-extern int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
- * Stop specified queue pair of a device
- *
- * @param	dev_id		The identifier of the device
- * @param	queue_pair_id	The index of the queue pair to stop. The value
- *				must be in the range [0, nb_queue_pair - 1]
- *				previously supplied to
- *				rte_cryptodev_configure().
- * @return
- *   - 0: Success, the transmit queue is correctly set up.
- *   - -EINVAL: The dev_id or the queue_id out of range.
- *   - -ENOTSUP: The function not supported in PMD driver.
- */
-extern int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
  * Get the number of queue pairs on a specific crypto device
  *
  * @param	dev_id		Crypto device identifier.
@@ -749,7 +807,7 @@ struct rte_cryptodev {
 	struct rte_cryptodev_ops *dev_ops;
 	/**< Functions exported by PMD */
 	uint64_t feature_flags;
-	/**< Supported features */
+	/**< Feature flags exposes HW/SW features for the given device */
 	struct rte_device *device;
 	/**< Backing device */
 
@@ -897,9 +955,14 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
  */
 struct rte_cryptodev_sym_session {
 	__extension__ void *sess_private_data[0];
-	/**< Private session material */
+	/**< Private symmetric session material */
 };
 
+/** Cryptodev asymmetric crypto session */
+struct rte_cryptodev_asym_session {
+	__extension__ void *sess_private_data[0];
+	/**< Private asymmetric session material */
+};
 
 /**
  * Create symmetric crypto session header (generic with no private data)
@@ -914,6 +977,18 @@ struct rte_cryptodev_sym_session *
 rte_cryptodev_sym_session_create(struct rte_mempool *mempool);
 
 /**
+ * Create asymmetric crypto session header (generic with no private data)
+ *
+ * @param   mempool    mempool to allocate asymmetric session
+ *                     objects from
+ * @return
+ *  - On success return pointer to asym-session
+ *  - On failure returns NULL
+ */
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mempool);
+
+/**
  * Frees symmetric crypto session header, after checking that all
  * the device private data has been freed, returning it
  * to its original mempool.
@@ -929,6 +1004,21 @@ int
 rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
 
 /**
+ * Frees asymmetric crypto session header, after checking that all
+ * the device private data has been freed, returning it
+ * to its original mempool.
+ *
+ * @param   sess     Session header to be freed.
+ *
+ * @return
+ *  - 0 if successful.
+ *  - -EINVAL if session is NULL.
+ *  - -EBUSY if not all device private data has been freed.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess);
+
+/**
  * Fill out private data for the device id, based on its device type.
  *
  * @param   dev_id   ID of device that we want the session to be used on
@@ -940,7 +1030,8 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
  * @return
  *  - On success, zero.
  *  - -EINVAL if input parameters are invalid.
- *  - -ENOTSUP if crypto device does not support the crypto transform.
+ *  - -ENOTSUP if crypto device does not support the crypto transform or
+ *    does not support symmetric operations.
  *  - -ENOMEM if the private session could not be allocated.
  */
 int
@@ -950,8 +1041,31 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
 			struct rte_mempool *mempool);
 
 /**
+ * Initialize asymmetric session on a device with specific asymmetric xform
+ *
+ * @param   dev_id   ID of device that we want the session to be used on
+ * @param   sess     Session to be set up on a device
+ * @param   xforms   Asymmetric crypto transform operations to apply on flow
+ *                   processed with this session
+ * @param   mempool  Mempool to be used for internal allocation.
+ *
+ * @return
+ *  - On success, zero.
+ *  - -EINVAL if input parameters are invalid.
+ *  - -ENOTSUP if crypto device does not support the crypto transform.
+ *  - -ENOMEM if the private session could not be allocated.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+			struct rte_cryptodev_asym_session *sess,
+			struct rte_crypto_asym_xform *xforms,
+			struct rte_mempool *mempool);
+
+/**
  * Frees private data for the device id, based on its device type,
- * returning it to its mempool.
+ * returning it to its mempool. It is the application's responsibility
+ * to ensure that private session data is not cleared while there are
+ * still in-flight operations using it.
  *
  * @param   dev_id   ID of device that uses the session.
  * @param   sess     Session containing the reference to the private data
@@ -959,63 +1073,70 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
  * @return
  *  - 0 if successful.
  *  - -EINVAL if device is invalid or session is NULL.
+ *  - -ENOTSUP if crypto device does not support symmetric operations.
  */
 int
 rte_cryptodev_sym_session_clear(uint8_t dev_id,
 			struct rte_cryptodev_sym_session *sess);
 
 /**
+ * Frees resources held by asymmetric session during rte_cryptodev_session_init
+ *
+ * @param   dev_id   ID of device that uses the asymmetric session.
+ * @param   sess     Asymmetric session setup on device using
+ *					 rte_cryptodev_session_init
+ * @return
+ *  - 0 if successful.
+ *  - -EINVAL if device is invalid or session is NULL.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+			struct rte_cryptodev_asym_session *sess);
+
+/**
  * Get the size of the header session, for all registered drivers.
  *
  * @return
- *   Size of the header session.
+ *   Size of the symmetric eader session.
  */
 unsigned int
-rte_cryptodev_get_header_session_size(void);
+rte_cryptodev_sym_get_header_session_size(void);
 
 /**
- * Get the size of the private session data for a device.
- *
- * @param	dev_id		The device identifier.
+ * Get the size of the asymmetric session header, for all registered drivers.
  *
  * @return
- *   - Size of the private data, if successful
- *   - 0 if device is invalid or does not have private session
+ *   Size of the asymmetric header session.
  */
-unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void);
 
 /**
- * Attach queue pair with sym session.
+ * Get the size of the private symmetric session data
+ * for a device.
  *
- * @param	dev_id		Device to which the session will be attached.
- * @param	qp_id		Queue pair to which the session will be attached.
- * @param	session		Session pointer previously allocated by
- *				*rte_cryptodev_sym_session_create*.
+ * @param	dev_id		The device identifier.
  *
  * @return
- *  - On success, zero.
- *  - On failure, a negative value.
+ *   - Size of the private data, if successful
+ *   - 0 if device is invalid or does not have private
+ *   symmetric session
  */
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *session);
+unsigned int
+rte_cryptodev_sym_get_private_session_size(uint8_t dev_id);
 
 /**
- * Detach queue pair with sym session.
+ * Get the size of the private data for asymmetric session
+ * on device
  *
- * @param	dev_id		Device to which the session is attached.
- * @param	qp_id		Queue pair to which the session is attached.
- * @param	session		Session pointer previously allocated by
- *				*rte_cryptodev_sym_session_create*.
+ * @param	dev_id		The device identifier.
  *
  * @return
- *  - On success, zero.
- *  - On failure, a negative value.
+ *   - Size of the asymmetric private data, if successful
+ *   - 0 if device is invalid or does not have private session
  */
-int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *session);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id);
 
 /**
  * Provide driver identifier.
@@ -1037,6 +1158,38 @@ int rte_cryptodev_driver_id_get(const char *name);
  */
 const char *rte_cryptodev_driver_name_get(uint8_t driver_id);
 
+/**
+ * Store user data in a session.
+ *
+ * @param	sess		Session pointer allocated by
+ *				*rte_cryptodev_sym_session_create*.
+ * @param	data		Pointer to the user data.
+ * @param	size		Size of the user data.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int __rte_experimental
+rte_cryptodev_sym_session_set_user_data(
+					struct rte_cryptodev_sym_session *sess,
+					void *data,
+					uint16_t size);
+
+/**
+ * Get user data stored in a session.
+ *
+ * @param	sess		Session pointer allocated by
+ *				*rte_cryptodev_sym_session_create*.
+ *
+ * @return
+ *  - On success return pointer to user data.
+ *  - On failure returns NULL.
+ */
+void * __rte_experimental
+rte_cryptodev_sym_session_get_user_data(
+					struct rte_cryptodev_sym_session *sess);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
index f2aac24b..2088ac3f 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.c
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -66,13 +66,6 @@ rte_cryptodev_pmd_parse_input_args(
 			goto free_kvlist;
 
 		ret = rte_kvargs_process(kvlist,
-				RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
-				&rte_cryptodev_pmd_parse_uint_arg,
-				&params->max_nb_sessions);
-		if (ret < 0)
-			goto free_kvlist;
-
-		ret = rte_kvargs_process(kvlist,
 				RTE_CRYPTODEV_PMD_SOCKET_ID_ARG,
 				&rte_cryptodev_pmd_parse_uint_arg,
 				&params->socket_id);
@@ -109,10 +102,9 @@ rte_cryptodev_pmd_create(const char *name,
 			device->driver->name, name);
 
 	CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
-			"socket id: %d, max queue pairs: %u, max sessions: %u",
+			"socket id: %d, max queue pairs: %u",
 			device->driver->name, name,
-			params->socket_id, params->max_nb_queue_pairs,
-			params->max_nb_sessions);
+			params->socket_id, params->max_nb_queue_pairs);
 
 	/* allocate device structure */
 	cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 089848e0..6ff49d64 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -1,32 +1,5 @@
-/*-
- *
- *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation.
  */
 
 #ifndef _RTE_CRYPTODEV_PMD_H_
@@ -59,18 +32,15 @@ extern "C" {
 
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
-#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS	2048
 
 #define RTE_CRYPTODEV_PMD_NAME_ARG			("name")
 #define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG			("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG		("max_nb_sessions")
 #define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG			("socket_id")
 
 
 static const char * const cryptodev_pmd_valid_params[] = {
 	RTE_CRYPTODEV_PMD_NAME_ARG,
 	RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG,
-	RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
 	RTE_CRYPTODEV_PMD_SOCKET_ID_ARG
 };
 
@@ -83,7 +53,6 @@ struct rte_cryptodev_pmd_init_params {
 	size_t private_data_size;
 	int socket_id;
 	unsigned int max_nb_queue_pairs;
-	unsigned int max_nb_sessions;
 };
 
 /** Global structure used for maintaining state of allocated crypto devices */
@@ -216,28 +185,6 @@ typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev,
 				struct rte_cryptodev_info *dev_info);
 
 /**
- * Start queue pair of a device.
- *
- * @param	dev	Crypto device pointer
- * @param	qp_id	Queue Pair Index
- *
- * @return	Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev,
-				uint16_t qp_id);
-
-/**
- * Stop queue pair of a device.
- *
- * @param	dev	Crypto device pointer
- * @param	qp_id	Queue Pair Index
- *
- * @return	Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev,
-				uint16_t qp_id);
-
-/**
  * Setup a queue pair for a device.
  *
  * @param	dev		Crypto device pointer
@@ -302,6 +249,17 @@ typedef int (*cryptodev_sym_create_session_pool_t)(
  */
 typedef unsigned (*cryptodev_sym_get_session_private_size_t)(
 		struct rte_cryptodev *dev);
+/**
+ * Get the size of a asymmetric cryptodev session
+ *
+ * @param	dev		Crypto device pointer
+ *
+ * @return
+ *  - On success returns the size of the session structure for device
+ *  - On failure returns 0
+ */
+typedef unsigned int (*cryptodev_asym_get_session_private_size_t)(
+		struct rte_cryptodev *dev);
 
 /**
  * Configure a Crypto session on a device.
@@ -321,7 +279,24 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
 		struct rte_crypto_sym_xform *xform,
 		struct rte_cryptodev_sym_session *session,
 		struct rte_mempool *mp);
-
+/**
+ * Configure a Crypto asymmetric session on a device.
+ *
+ * @param	dev		Crypto device pointer
+ * @param	xform		Single or chain of crypto xforms
+ * @param	priv_sess	Pointer to cryptodev's private session structure
+ * @param	mp		Mempool where the private session is allocated
+ *
+ * @return
+ *  - Returns 0 if private session structure have been created successfully.
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ *  - Returns -ENOMEM if the private session could not be allocated.
+ */
+typedef int (*cryptodev_asym_configure_session_t)(struct rte_cryptodev *dev,
+		struct rte_crypto_asym_xform *xform,
+		struct rte_cryptodev_asym_session *session,
+		struct rte_mempool *mp);
 /**
  * Free driver private session data.
  *
@@ -330,32 +305,14 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
  */
 typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev,
 		struct rte_cryptodev_sym_session *sess);
-
-/**
- * Optional API for drivers to attach sessions with queue pair.
- * @param	dev		Crypto device pointer
- * @param	qp_id		queue pair id for attaching session
- * @param	priv_sess       Pointer to cryptodev's private session structure
- * @return
- *  - Return 0 on success
- */
-typedef int (*cryptodev_sym_queue_pair_attach_session_t)(
-		  struct rte_cryptodev *dev,
-		  uint16_t qp_id,
-		  void *session_private);
-
 /**
- * Optional API for drivers to detach sessions from queue pair.
+ * Free asymmetric session private data.
+ *
  * @param	dev		Crypto device pointer
- * @param	qp_id		queue pair id for detaching session
- * @param	priv_sess       Pointer to cryptodev's private session structure
- * @return
- *  - Return 0 on success
+ * @param	sess		Cryptodev session structure
  */
-typedef int (*cryptodev_sym_queue_pair_detach_session_t)(
-		  struct rte_cryptodev *dev,
-		  uint16_t qp_id,
-		  void *session_private);
+typedef void (*cryptodev_asym_free_session_t)(struct rte_cryptodev *dev,
+		struct rte_cryptodev_asym_session *sess);
 
 /** Crypto device operations function pointer table */
 struct rte_cryptodev_ops {
@@ -375,23 +332,21 @@ struct rte_cryptodev_ops {
 	/**< Set up a device queue pair. */
 	cryptodev_queue_pair_release_t queue_pair_release;
 	/**< Release a queue pair. */
-	cryptodev_queue_pair_start_t queue_pair_start;
-	/**< Start a queue pair. */
-	cryptodev_queue_pair_stop_t queue_pair_stop;
-	/**< Stop a queue pair. */
 	cryptodev_queue_pair_count_t queue_pair_count;
 	/**< Get count of the queue pairs. */
 
-	cryptodev_sym_get_session_private_size_t session_get_size;
+	cryptodev_sym_get_session_private_size_t sym_session_get_size;
 	/**< Return private session. */
-	cryptodev_sym_configure_session_t session_configure;
+	cryptodev_asym_get_session_private_size_t asym_session_get_size;
+	/**< Return asym session private size. */
+	cryptodev_sym_configure_session_t sym_session_configure;
 	/**< Configure a Crypto session. */
-	cryptodev_sym_free_session_t session_clear;
+	cryptodev_asym_configure_session_t asym_session_configure;
+	/**< Configure asymmetric Crypto session. */
+	cryptodev_sym_free_session_t sym_session_clear;
+	/**< Clear a Crypto sessions private data. */
+	cryptodev_asym_free_session_t asym_session_clear;
 	/**< Clear a Crypto sessions private data. */
-	cryptodev_sym_queue_pair_attach_session_t qp_attach_session;
-	/**< Attach session to queue pair. */
-	cryptodev_sym_queue_pair_detach_session_t qp_detach_session;
-	/**< Detach session from queue pair. */
 };
 
 
@@ -516,20 +471,32 @@ uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv,
 
 
 #define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\
-RTE_INIT(init_ ##driver_id);\
-static void init_ ##driver_id(void)\
+RTE_INIT(init_ ##driver_id)\
 {\
-	driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv).driver);\
+	driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv));\
+}
+
+static inline void *
+get_sym_session_private_data(const struct rte_cryptodev_sym_session *sess,
+		uint8_t driver_id) {
+	return sess->sess_private_data[driver_id];
+}
+
+static inline void
+set_sym_session_private_data(struct rte_cryptodev_sym_session *sess,
+		uint8_t driver_id, void *private_data)
+{
+	sess->sess_private_data[driver_id] = private_data;
 }
 
 static inline void *
-get_session_private_data(const struct rte_cryptodev_sym_session *sess,
+get_asym_session_private_data(const struct rte_cryptodev_asym_session *sess,
 		uint8_t driver_id) {
 	return sess->sess_private_data[driver_id];
 }
 
 static inline void
-set_session_private_data(struct rte_cryptodev_sym_session *sess,
+set_asym_session_private_data(struct rte_cryptodev_asym_session *sess,
 		uint8_t driver_id, void *private_data)
 {
 	sess->sess_private_data[driver_id] = private_data;
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index eb47308b..7ca00735 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -22,8 +22,6 @@ DPDK_16.04 {
 	rte_cryptodev_stop;
 	rte_cryptodev_queue_pair_count;
 	rte_cryptodev_queue_pair_setup;
-	rte_cryptodev_queue_pair_start;
-	rte_cryptodev_queue_pair_stop;
 	rte_crypto_op_pool_create;
 
 	local: *;
@@ -52,8 +50,6 @@ DPDK_17.05 {
 
 	rte_cryptodev_get_auth_algo_enum;
 	rte_cryptodev_get_cipher_algo_enum;
-	rte_cryptodev_queue_pair_attach_sym_session;
-	rte_cryptodev_queue_pair_detach_sym_session;
 
 } DPDK_17.02;
 
@@ -65,8 +61,6 @@ DPDK_17.08 {
 	rte_cryptodev_driver_id_get;
 	rte_cryptodev_driver_name_get;
 	rte_cryptodev_get_aead_algo_enum;
-	rte_cryptodev_get_header_session_size;
-	rte_cryptodev_get_private_session_size;
 	rte_cryptodev_sym_capability_check_aead;
 	rte_cryptodev_sym_session_init;
 	rte_cryptodev_sym_session_clear;
@@ -85,3 +79,30 @@ DPDK_17.11 {
 	rte_cryptodev_pmd_parse_input_args;
 
 } DPDK_17.08;
+
+DPDK_18.05 {
+	global:
+
+	rte_cryptodev_sym_get_header_session_size;
+	rte_cryptodev_sym_get_private_session_size;
+
+} DPDK_17.11;
+
+EXPERIMENTAL {
+        global:
+
+	rte_cryptodev_asym_capability_get;
+	rte_cryptodev_asym_get_header_session_size;
+	rte_cryptodev_asym_get_private_session_size;
+	rte_cryptodev_asym_get_xform_enum;
+	rte_cryptodev_asym_session_clear;
+	rte_cryptodev_asym_session_create;
+	rte_cryptodev_asym_session_free;
+	rte_cryptodev_asym_session_init;
+	rte_cryptodev_asym_xform_capability_check_modlen;
+	rte_cryptodev_asym_xform_capability_check_optype;
+	rte_cryptodev_sym_session_get_user_data;
+	rte_cryptodev_sym_session_set_user_data;
+	rte_crypto_asym_op_strings;
+	rte_crypto_asym_xform_strings;
+};
diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile
index 9d8e2477..5b06b216 100644
--- a/lib/librte_eal/bsdapp/Makefile
+++ b/lib/librte_eal/bsdapp/Makefile
@@ -4,7 +4,5 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal
-DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem
-DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
deleted file mode 100644
index 33ce83ee..00000000
--- a/lib/librte_eal/bsdapp/contigmem/BSDmakefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-#
-
-KMOD=	contigmem
-SRCS=	contigmem.c device_if.h bus_if.h
-
-.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile
deleted file mode 100644
index 428a7ede..00000000
--- a/lib/librte_eal/bsdapp/contigmem/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = contigmem
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR)
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -Winline -Wall -Werror
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := contigmem.c
-
-include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
deleted file mode 100644
index 1715b5dc..00000000
--- a/lib/librte_eal/bsdapp/contigmem/contigmem.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bio.h>
-#include <sys/bus.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <sys/vmmeter.h>
-
-#include <machine/bus.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_phys.h>
-
-struct contigmem_buffer {
-	void           *addr;
-	int             refcnt;
-	struct mtx      mtx;
-};
-
-struct contigmem_vm_handle {
-	int             buffer_index;
-};
-
-static int              contigmem_load(void);
-static int              contigmem_unload(void);
-static int              contigmem_physaddr(SYSCTL_HANDLER_ARGS);
-
-static d_mmap_single_t  contigmem_mmap_single;
-static d_open_t         contigmem_open;
-static d_close_t        contigmem_close;
-
-static int              contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
-static int64_t          contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
-
-static eventhandler_tag contigmem_eh_tag;
-static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
-static struct cdev     *contigmem_cdev = NULL;
-static int              contigmem_refcnt;
-
-TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
-TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
-
-static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
-
-SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
-	&contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
-SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
-	&contigmem_buffer_size, 0, "Size of each contiguous buffer");
-SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD,
-	&contigmem_refcnt, 0, "Number of references to contigmem");
-
-static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
-	"physaddr");
-
-MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
-
-static int contigmem_modevent(module_t mod, int type, void *arg)
-{
-	int error = 0;
-
-	switch (type) {
-	case MOD_LOAD:
-		error = contigmem_load();
-		break;
-	case MOD_UNLOAD:
-		error = contigmem_unload();
-		break;
-	default:
-		break;
-	}
-
-	return error;
-}
-
-moduledata_t contigmem_mod = {
-	"contigmem",
-	(modeventhand_t)contigmem_modevent,
-	0
-};
-
-DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
-MODULE_VERSION(contigmem, 1);
-
-static struct cdevsw contigmem_ops = {
-	.d_name         = "contigmem",
-	.d_version      = D_VERSION,
-	.d_flags        = D_TRACKCLOSE,
-	.d_mmap_single  = contigmem_mmap_single,
-	.d_open         = contigmem_open,
-	.d_close        = contigmem_close,
-};
-
-static int
-contigmem_load()
-{
-	char index_string[8], description[32];
-	int  i, error = 0;
-	void *addr;
-
-	if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
-		printf("%d buffers requested is greater than %d allowed\n",
-				contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
-		error = EINVAL;
-		goto error;
-	}
-
-	if (contigmem_buffer_size < PAGE_SIZE ||
-			(contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
-		printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
-				"power of two\n", contigmem_buffer_size);
-		error = EINVAL;
-		goto error;
-	}
-
-	for (i = 0; i < contigmem_num_buffers; i++) {
-		addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
-			0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
-		if (addr == NULL) {
-			printf("contigmalloc failed for buffer %d\n", i);
-			error = ENOMEM;
-			goto error;
-		}
-
-		printf("%2u: virt=%p phys=%p\n", i, addr,
-			(void *)pmap_kextract((vm_offset_t)addr));
-
-		mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
-		contigmem_buffers[i].addr = addr;
-		contigmem_buffers[i].refcnt = 0;
-
-		snprintf(index_string, sizeof(index_string), "%d", i);
-		snprintf(description, sizeof(description),
-				"phys addr for buffer %d", i);
-		SYSCTL_ADD_PROC(NULL,
-				&SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
-				index_string, CTLTYPE_U64 | CTLFLAG_RD,
-				(void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
-				description);
-	}
-
-	contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
-			GID_WHEEL, 0600, "contigmem");
-
-	return 0;
-
-error:
-	for (i = 0; i < contigmem_num_buffers; i++) {
-		if (contigmem_buffers[i].addr != NULL)
-			contigfree(contigmem_buffers[i].addr,
-				contigmem_buffer_size, M_CONTIGMEM);
-		if (mtx_initialized(&contigmem_buffers[i].mtx))
-			mtx_destroy(&contigmem_buffers[i].mtx);
-	}
-
-	return error;
-}
-
-static int
-contigmem_unload()
-{
-	int i;
-
-	if (contigmem_refcnt > 0)
-		return EBUSY;
-
-	if (contigmem_cdev != NULL)
-		destroy_dev(contigmem_cdev);
-
-	if (contigmem_eh_tag != NULL)
-		EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
-
-	for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
-		if (contigmem_buffers[i].addr != NULL)
-			contigfree(contigmem_buffers[i].addr,
-				contigmem_buffer_size, M_CONTIGMEM);
-		if (mtx_initialized(&contigmem_buffers[i].mtx))
-			mtx_destroy(&contigmem_buffers[i].mtx);
-	}
-
-	return 0;
-}
-
-static int
-contigmem_physaddr(SYSCTL_HANDLER_ARGS)
-{
-	uint64_t	physaddr;
-	int		index = (int)(uintptr_t)arg1;
-
-	physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
-	return sysctl_handle_64(oidp, &physaddr, 0, req);
-}
-
-static int
-contigmem_open(struct cdev *cdev, int fflags, int devtype,
-		struct thread *td)
-{
-
-	atomic_add_int(&contigmem_refcnt, 1);
-
-	return 0;
-}
-
-static int
-contigmem_close(struct cdev *cdev, int fflags, int devtype,
-		struct thread *td)
-{
-
-	atomic_subtract_int(&contigmem_refcnt, 1);
-
-	return 0;
-}
-
-static int
-contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
-		vm_ooffset_t foff, struct ucred *cred, u_short *color)
-{
-	struct contigmem_vm_handle *vmh = handle;
-	struct contigmem_buffer *buf;
-
-	buf = &contigmem_buffers[vmh->buffer_index];
-
-	atomic_add_int(&contigmem_refcnt, 1);
-
-	mtx_lock(&buf->mtx);
-	if (buf->refcnt == 0)
-		memset(buf->addr, 0, contigmem_buffer_size);
-	buf->refcnt++;
-	mtx_unlock(&buf->mtx);
-
-	return 0;
-}
-
-static void
-contigmem_cdev_pager_dtor(void *handle)
-{
-	struct contigmem_vm_handle *vmh = handle;
-	struct contigmem_buffer *buf;
-
-	buf = &contigmem_buffers[vmh->buffer_index];
-
-	mtx_lock(&buf->mtx);
-	buf->refcnt--;
-	mtx_unlock(&buf->mtx);
-
-	free(vmh, M_CONTIGMEM);
-
-	atomic_subtract_int(&contigmem_refcnt, 1);
-}
-
-static int
-contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
-		vm_page_t *mres)
-{
-	vm_paddr_t paddr;
-	vm_page_t m_paddr, page;
-	vm_memattr_t memattr, memattr1;
-
-	memattr = object->memattr;
-
-	VM_OBJECT_WUNLOCK(object);
-
-	paddr = offset;
-
-	m_paddr = vm_phys_paddr_to_vm_page(paddr);
-	if (m_paddr != NULL) {
-		memattr1 = pmap_page_get_memattr(m_paddr);
-		if (memattr1 != memattr)
-			memattr = memattr1;
-	}
-
-	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
-		/*
-		 * If the passed in result page is a fake page, update it with
-		 * the new physical address.
-		 */
-		page = *mres;
-		VM_OBJECT_WLOCK(object);
-		vm_page_updatefake(page, paddr, memattr);
-	} else {
-		vm_page_t mret;
-		/*
-		 * Replace the passed in reqpage page with our own fake page and
-		 * free up the original page.
-		 */
-		page = vm_page_getfake(paddr, memattr);
-		VM_OBJECT_WLOCK(object);
-		mret = vm_page_replace(page, object, (*mres)->pindex);
-		KASSERT(mret == *mres,
-		    ("invalid page replacement, old=%p, ret=%p", *mres, mret));
-		vm_page_lock(mret);
-		vm_page_free(mret);
-		vm_page_unlock(mret);
-		*mres = page;
-	}
-
-	page->valid = VM_PAGE_BITS_ALL;
-
-	return VM_PAGER_OK;
-}
-
-static struct cdev_pager_ops contigmem_cdev_pager_ops = {
-	.cdev_pg_ctor = contigmem_cdev_pager_ctor,
-	.cdev_pg_dtor = contigmem_cdev_pager_dtor,
-	.cdev_pg_fault = contigmem_cdev_pager_fault,
-};
-
-static int
-contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
-		struct vm_object **obj, int nprot)
-{
-	struct contigmem_vm_handle *vmh;
-	uint64_t buffer_index;
-
-	/*
-	 * The buffer index is encoded in the offset.  Divide the offset by
-	 *  PAGE_SIZE to get the index of the buffer requested by the user
-	 *  app.
-	 */
-	buffer_index = *offset / PAGE_SIZE;
-	if (buffer_index >= contigmem_num_buffers)
-		return EINVAL;
-
-	if (size > contigmem_buffer_size)
-		return EINVAL;
-
-	vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO);
-	if (vmh == NULL)
-		return ENOMEM;
-	vmh->buffer_index = buffer_index;
-
-	*offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
-	*obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
-			size, nprot, *offset, curthread->td_ucred);
-
-	return 0;
-}
diff --git a/lib/librte_eal/bsdapp/contigmem/meson.build b/lib/librte_eal/bsdapp/contigmem/meson.build
deleted file mode 100644
index 8fb2ab78..00000000
--- a/lib/librte_eal/bsdapp/contigmem/meson.build
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
-
-sources = files('contigmem.c')
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index dd455e67..d27da3d1 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -18,21 +18,25 @@ CFLAGS += $(WERROR_FLAGS) -O3
 LDLIBS += -lexecinfo
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
 
 EXPORT_MAP := ../../rte_eal_version.map
 
-LIBABIVER := 6
+LIBABIVER := 8
 
 # specific to bsdapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_cpuflags.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memalloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_dev.c
 
 # from common dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
@@ -40,6 +44,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memalloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
@@ -48,14 +53,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 4eafcb5a..d7ae9d68 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -18,6 +18,7 @@
 #include <limits.h>
 #include <sys/mman.h>
 #include <sys/queue.h>
+#include <sys/stat.h>
 
 #include <rte_compat.h>
 #include <rte_common.h>
@@ -40,6 +41,7 @@
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_version.h>
+#include <rte_vfio.h>
 #include <rte_atomic.h>
 #include <malloc_heap.h>
 
@@ -64,8 +66,8 @@ static int mem_cfg_fd = -1;
 static struct flock wr_lock = {
 		.l_type = F_WRLCK,
 		.l_whence = SEEK_SET,
-		.l_start = offsetof(struct rte_mem_config, memseg),
-		.l_len = sizeof(early_mem_config.memseg),
+		.l_start = offsetof(struct rte_mem_config, memsegs),
+		.l_len = sizeof(early_mem_config.memsegs),
 };
 
 /* Address of global and public configuration */
@@ -82,20 +84,72 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
-/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
-{
-	return internal_config.user_mbuf_pool_ops_name;
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
+{
+	const char *directory = default_runtime_dir;
+	const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+	const char *fallback = "/tmp";
+	char tmp[PATH_MAX];
+	int ret;
+
+	if (getuid() != 0) {
+		/* try XDG path first, fall back to /tmp */
+		if (xdg_runtime_dir != NULL)
+			directory = xdg_runtime_dir;
+		else
+			directory = fallback;
+	}
+	/* create DPDK subdirectory under runtime dir */
+	ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+	if (ret < 0 || ret == sizeof(tmp)) {
+		RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+		return -1;
+	}
+
+	/* create prefix-specific subdirectory under DPDK runtime dir */
+	ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+			tmp, internal_config.hugefile_prefix);
+	if (ret < 0 || ret == sizeof(runtime_dir)) {
+		RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+		return -1;
+	}
+
+	/* create the path if it doesn't exist. no "mkdir -p" here, so do it
+	 * step by step.
+	 */
+	ret = mkdir(tmp, 0700);
+	if (ret < 0 && errno != EEXIST) {
+		RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+			tmp, strerror(errno));
+		return -1;
+	}
+
+	ret = mkdir(runtime_dir, 0700);
+	if (ret < 0 && errno != EEXIST) {
+		RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+			runtime_dir, strerror(errno));
+		return -1;
+	}
+
+	return 0;
 }
 
-/* Return mbuf pool ops name */
 const char *
-rte_eal_mbuf_default_mempool_ops(void)
+eal_get_runtime_dir(void)
 {
-	if (internal_config.user_mbuf_pool_ops_name == NULL)
-		return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
+	return runtime_dir;
+}
 
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
 	return internal_config.user_mbuf_pool_ops_name;
 }
 
@@ -222,12 +276,17 @@ eal_proc_type_detect(void)
 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
 	const char *pathname = eal_runtime_config_path();
 
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
+	/* if there no shared config, there can be no secondary processes */
+	if (!internal_config.no_shconf) {
+		/* if we can open the file but not get a write-lock we are a
+		 * secondary process. NOTE: if we get a file handle back, we
+		 * keep that open and don't close it to prevent a race condition
+		 * between multiple opens.
+		 */
+		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+			ptype = RTE_PROC_SECONDARY;
+	}
 
 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -289,7 +348,7 @@ eal_get_hugepage_mem_size(void)
 
 	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
 		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL) {
+		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
 			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
 				size += hpi->hugepage_sz * hpi->num_pages[j];
 			}
@@ -379,7 +438,8 @@ eal_parse_args(int argc, char **argv)
 
 		switch (opt) {
 		case OPT_MBUF_POOL_OPS_NAME_NUM:
-			internal_config.user_mbuf_pool_ops_name = optarg;
+			internal_config.user_mbuf_pool_ops_name =
+			    strdup(optarg);
 			break;
 		case 'h':
 			eal_usage(prgname);
@@ -403,6 +463,14 @@ eal_parse_args(int argc, char **argv)
 		}
 	}
 
+	/* create runtime data directory */
+	if (internal_config.no_shconf == 0 &&
+			eal_create_runtime_dir() < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+		ret = -1;
+		goto out;
+	}
+
 	if (eal_adjust_config(&internal_config) != 0) {
 		ret = -1;
 		goto out;
@@ -429,25 +497,29 @@ out:
 	return ret;
 }
 
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+	int *socket_id = arg;
+
+	if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
+		return 1;
+
+	return 0;
+}
+
 static void
 eal_check_mem_on_local_socket(void)
 {
-	const struct rte_memseg *ms;
-	int i, socket_id;
+	int socket_id;
 
 	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
 
-	ms = rte_eal_get_physmem_layout();
-
-	for (i = 0; i < RTE_MAX_MEMSEG; i++)
-		if (ms[i].socket_id == socket_id &&
-				ms[i].len > 0)
-			return;
-
-	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
-			"memory on local socket!\n");
+	if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+		RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
 }
 
+
 static int
 sync_func(__attribute__((unused)) void *arg)
 {
@@ -531,6 +603,9 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	/* FreeBSD always uses legacy memory model */
+	internal_config.legacy_mem = true;
+
 	if (eal_plugins_init() < 0) {
 		rte_eal_init_alert("Cannot init plugins\n");
 		rte_errno = EINVAL;
@@ -544,6 +619,24 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	rte_config_init();
+
+	if (rte_eal_intr_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		return -1;
+	}
+
+	/* Put mp channel init before bus scan so that we can init the vdev
+	 * bus through mp channel in the secondary process before the bus scan.
+	 */
+	if (rte_mp_channel_init() < 0) {
+		rte_eal_init_alert("failed to init mp channel\n");
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	}
+
 	if (rte_bus_scan()) {
 		rte_eal_init_alert("Cannot scan the buses for devices\n");
 		rte_errno = ENODEV;
@@ -554,13 +647,17 @@ rte_eal_init(int argc, char **argv)
 	/* autodetect the iova mapping mode (default is iova_pa) */
 	rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
 
-	if (internal_config.no_hugetlbfs == 0 &&
-			internal_config.process_type != RTE_PROC_SECONDARY &&
-			eal_hugepage_info_init() < 0) {
-		rte_eal_init_alert("Cannot get hugepage information.");
-		rte_errno = EACCES;
-		rte_atomic32_clear(&run_once);
-		return -1;
+	if (internal_config.no_hugetlbfs == 0) {
+		/* rte_config isn't initialized yet */
+		ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+			eal_hugepage_info_init() :
+			eal_hugepage_info_read();
+		if (ret < 0) {
+			rte_eal_init_alert("Cannot get hugepage information.");
+			rte_errno = EACCES;
+			rte_atomic32_clear(&run_once);
+			return -1;
+		}
 	}
 
 	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
@@ -583,14 +680,14 @@ rte_eal_init(int argc, char **argv)
 
 	rte_srand(rte_rdtsc());
 
-	rte_config_init();
-
-	if (rte_mp_channel_init() < 0) {
-		rte_eal_init_alert("failed to init mp channel\n");
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			rte_errno = EFAULT;
-			return -1;
-		}
+	/* in secondary processes, memory init may allocate additional fbarrays
+	 * not present in primary processes, so to avoid any potential issues,
+	 * initialize memzones first.
+	 */
+	if (rte_eal_memzone_init() < 0) {
+		rte_eal_init_alert("Cannot init memzone\n");
+		rte_errno = ENODEV;
+		return -1;
 	}
 
 	if (rte_eal_memory_init() < 0) {
@@ -599,8 +696,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_eal_memzone_init() < 0) {
-		rte_eal_init_alert("Cannot init memzone\n");
+	if (rte_eal_malloc_heap_init() < 0) {
+		rte_eal_init_alert("Cannot init malloc heap\n");
 		rte_errno = ENODEV;
 		return -1;
 	}
@@ -617,11 +714,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
-		return -1;
-	}
-
 	if (rte_eal_timer_init() < 0) {
 		rte_eal_init_alert("Cannot init HPET or TSC timers\n");
 		rte_errno = ENOTSUP;
@@ -632,7 +724,7 @@ rte_eal_init(int argc, char **argv)
 
 	eal_thread_init_master(rte_config.master_lcore);
 
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
@@ -658,7 +750,7 @@ rte_eal_init(int argc, char **argv)
 			rte_panic("Cannot create thread\n");
 
 		/* Set thread_name for aid in debugging. */
-		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
 	}
@@ -735,18 +827,6 @@ rte_eal_vfio_intr_mode(void)
 	return RTE_INTR_MODE_NONE;
 }
 
-/* dummy forward declaration. */
-struct vfio_device_info;
-
-/* dummy prototypes. */
-int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
-		int *vfio_dev_fd, struct vfio_device_info *device_info);
-int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
-int rte_vfio_enable(const char *modname);
-int rte_vfio_is_enabled(const char *modname);
-int rte_vfio_noiommu_is_enabled(void);
-int rte_vfio_clear_group(int vfio_group_fd);
-
 int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
 		      __rte_unused const char *dev_addr,
 		      __rte_unused int *vfio_dev_fd,
@@ -781,3 +861,81 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
 {
 	return 0;
 }
+
+int
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+		       __rte_unused const char *dev_addr,
+		       __rte_unused int *iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+			__rte_unused uint64_t vaddr,
+			__rte_unused uint64_t iova,
+			__rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+			__rte_unused uint64_t vaddr,
+			__rte_unused uint64_t iova,
+			__rte_unused uint64_t len)
+{
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c9..51ea4b8c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_alarm.h>
+#include <rte_cycles.h>
 #include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
 #include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+	LIST_ENTRY(alarm_entry) next;
+	struct rte_intr_handle handle;
+	struct timespec time;
+	rte_eal_alarm_callback cb_fn;
+	void *cb_arg;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
 
 int
 rte_eal_alarm_init(void)
 {
+	intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+	/* on FreeBSD, timers don't use fd's, and their identifiers are stored
+	 * in separate namespace from fd's, so using any value is OK. however,
+	 * EAL interrupts handler expects fd's to be unique, so use an actual fd
+	 * to guarantee unique timer identifier.
+	 */
+	intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+	return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+	if (now->tv_sec < at->tv_sec)
+		return -1;
+	if (now->tv_sec > at->tv_sec)
+		return 1;
+	if (now->tv_nsec < at->tv_nsec)
+		return -1;
+	if (now->tv_nsec > at->tv_nsec)
+		return 1;
 	return 0;
 }
 
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+	uint64_t now_ns, at_ns;
+
+	if (timespec_cmp(now, at) >= 0)
+		return 0;
+
+	now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+	at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+	return at_ns - now_ns;
+}
 
 int
-rte_eal_alarm_set(uint64_t us __rte_unused,
-		rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+eal_alarm_get_timeout_ns(uint64_t *val)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap;
+	struct timespec now;
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return -1;
+
+	if (LIST_EMPTY(&alarm_list))
+		return -1;
+
+	ap = LIST_FIRST(&alarm_list);
+
+	*val = diff_ns(&now, &ap->time);
+
+	return 0;
+}
+
+static int
+unregister_current_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		do {
+			ret = rte_intr_callback_unregister(&intr_handle,
+				eal_alarm_callback, &ap->time);
+		} while (ret == -EAGAIN);
+	}
+
+	return ret;
 }
 
+static int
+register_first_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		/* register a new callback */
+		ret = rte_intr_callback_register(&intr_handle,
+				eal_alarm_callback, &ap->time);
+	}
+	return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+	struct timespec now;
+	struct alarm_entry *ap;
+
+	rte_spinlock_lock(&alarm_list_lk);
+	ap = LIST_FIRST(&alarm_list);
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return;
+
+	while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+		ap->executing = 1;
+		ap->executing_id = pthread_self();
+		rte_spinlock_unlock(&alarm_list_lk);
+
+		ap->cb_fn(ap->cb_arg);
+
+		rte_spinlock_lock(&alarm_list_lk);
+
+		LIST_REMOVE(ap, next);
+		free(ap);
+
+		ap = LIST_FIRST(&alarm_list);
+	}
+
+	/* timer has been deleted from the kqueue, so recreate it if needed */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+}
+
+
 int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap, *new_alarm;
+	struct timespec now;
+	uint64_t ns;
+	int ret = 0;
+
+	/* check parameters, also ensure us won't cause a uint64_t overflow */
+	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+		return -EINVAL;
+
+	new_alarm = calloc(1, sizeof(*new_alarm));
+	if (new_alarm == NULL)
+		return -ENOMEM;
+
+	/* use current time to calculate absolute time of alarm */
+	clock_gettime(CLOCK_TYPE_ID, &now);
+
+	ns = us * NS_PER_US;
+
+	new_alarm->cb_fn = cb_fn;
+	new_alarm->cb_arg = cb_arg;
+	new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	if (LIST_EMPTY(&alarm_list))
+		LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+	else {
+		LIST_FOREACH(ap, &alarm_list, next) {
+			if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+				LIST_INSERT_BEFORE(ap, new_alarm, next);
+				break;
+			}
+			if (LIST_NEXT(ap, next) == NULL) {
+				LIST_INSERT_AFTER(ap, new_alarm, next);
+				break;
+			}
+		}
+	}
+
+	/* re-register first callback just in case */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct alarm_entry *ap, *ap_prev;
+	int count = 0;
+	int err = 0;
+	int executing;
+
+	if (!cb_fn) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while (1) {
+			ap = LIST_FIRST(&alarm_list);
+			if (ap == NULL)
+				break;
+			if (cb_fn != ap->cb_fn)
+				break;
+			if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+				break;
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				free(ap);
+				count++;
+			} else {
+				/* If calling from other context, mark that
+				 * alarm is executing so loop can spin till it
+				 * finish. Otherwise we are trying to cancel
+				 * ourselves - mark it by EINPROGRESS.
+				 */
+				if (pthread_equal(ap->executing_id,
+						pthread_self()) == 0)
+					executing++;
+				else
+					err = EINPROGRESS;
+
+				break;
+			}
+		}
+		ap_prev = ap;
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 ||
+					 cb_arg == ap->cb_arg)) {
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap, next);
+					free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id,
+							 pthread_self()) == 0) {
+					executing++;
+				} else {
+					err = EINPROGRESS;
+				}
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
+	if (count == 0 && err == 0)
+		rte_errno = ENOENT;
+	else if (err)
+		rte_errno = err;
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	/* unregister if no alarms left, otherwise re-register first */
+	if (LIST_EMPTY(&alarm_list))
+		unregister_current_callback();
+	else
+		register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return count;
 }
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 00000000..65c71151
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_cpuflags.c b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c
new file mode 100644
index 00000000..69b161ea
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+unsigned long
+rte_cpu_getauxval(unsigned long type __rte_unused)
+{
+	/* not implemented */
+	return 0;
+}
+
+int
+rte_cpu_strcmp_auxval(unsigned long type __rte_unused,
+		const char *str __rte_unused)
+{
+	/* not implemented */
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c
new file mode 100644
index 00000000..1c6c51bd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_log.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+
+int __rte_experimental
+rte_dev_event_monitor_start(void)
+{
+	RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+	return -1;
+}
+
+int __rte_experimental
+rte_dev_event_monitor_stop(void)
+{
+	RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index be2dbf0e..1e8f5df2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -19,10 +19,10 @@
  * Used in this file to store the hugepage file map on disk
  */
 static void *
-create_shared_memory(const char *filename, const size_t mem_size)
+map_shared_memory(const char *filename, const size_t mem_size, int flags)
 {
 	void *retval;
-	int fd = open(filename, O_CREAT | O_RDWR, 0666);
+	int fd = open(filename, flags, 0666);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
@@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size)
 	return retval;
 }
 
+static void *
+open_shared_memory(const char *filename, const size_t mem_size)
+{
+	return map_shared_memory(filename, mem_size, O_RDWR);
+}
+
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+	return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
+}
+
 /*
  * No hugepage support on freebsd, but we dummy it, using contigmem driver
  */
@@ -46,13 +58,16 @@ eal_hugepage_info_init(void)
 	/* re-use the linux "internal config" structure for our memory data */
 	struct hugepage_info *hpi = &internal_config.hugepage_info[0];
 	struct hugepage_info *tmp_hpi;
+	unsigned int i;
+
+	internal_config.num_hugepage_sizes = 1;
 
 	sysctl_size = sizeof(num_buffers);
 	error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
 			&sysctl_size, NULL, 0);
 
 	if (error != 0) {
-		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers");
+		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers\n");
 		return -1;
 	}
 
@@ -61,7 +76,7 @@ eal_hugepage_info_init(void)
 			&sysctl_size, NULL, 0);
 
 	if (error != 0) {
-		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size");
+		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size\n");
 		return -1;
 	}
 
@@ -81,25 +96,61 @@ eal_hugepage_info_init(void)
 		RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
 				num_buffers, (int)(buffer_size>>10));
 
-	internal_config.num_hugepage_sizes = 1;
-	hpi->hugedir = CONTIGMEM_DEV;
+	strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir));
 	hpi->hugepage_sz = buffer_size;
 	hpi->num_pages[0] = num_buffers;
 	hpi->lock_descriptor = fd;
 
+	/* for no shared files mode, do not create shared memory config */
+	if (internal_config.no_shconf)
+		return 0;
+
 	tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
-					sizeof(struct hugepage_info));
+			sizeof(internal_config.hugepage_info));
 	if (tmp_hpi == NULL ) {
 		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
 		return -1;
 	}
 
-	memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info));
+	memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
+
+	/* we've copied file descriptors along with everything else, but they
+	 * will be invalid in secondary process, so overwrite them
+	 */
+	for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+		struct hugepage_info *tmp = &tmp_hpi[i];
+		tmp->lock_descriptor = -1;
+	}
 
-	if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) {
+	if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
 		RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
 		return -1;
 	}
 
 	return 0;
 }
+
+/* copy stuff from shared info into internal config */
+int
+eal_hugepage_info_read(void)
+{
+	struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+	struct hugepage_info *tmp_hpi;
+
+	internal_config.num_hugepage_sizes = 1;
+
+	tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
+				  sizeof(internal_config.hugepage_info));
+	if (tmp_hpi == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
+		return -1;
+	}
+
+	memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
+
+	if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+		return -1;
+	}
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 290d53ab..2feee2d5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -1,51 +1,479 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
+#include <string.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
+
 #include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define MAX_INTR_EVENTS 16
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+	char charbuf[16];                /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+	TAILQ_ENTRY(rte_intr_callback) next;
+	rte_intr_callback_fn cb_fn;  /**< callback address */
+	void *cb_arg;                /**< parameter for callback */
+};
+
+struct rte_intr_source {
+	TAILQ_ENTRY(rte_intr_source) next;
+	struct rte_intr_handle intr_handle; /**< interrupt handle */
+	struct rte_intr_cb_list callbacks;  /**< user callbacks */
+	uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+static volatile int kq = -1;
+
+static int
+intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
+{
+	/* alarm callbacks are special case */
+	if (ih->type == RTE_INTR_HANDLE_ALARM) {
+		uint64_t timeout_ns;
+
+		/* get soonest alarm timeout */
+		if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+			return -1;
+
+		ke->filter = EVFILT_TIMER;
+		/* timers are one shot */
+		ke->flags |= EV_ONESHOT;
+		ke->fflags = NOTE_NSECONDS;
+		ke->data = timeout_ns;
+	} else {
+		ke->filter = EVFILT_READ;
+	}
+	ke->ident = ih->fd;
+
+	return 0;
+}
 
 int
 rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
-			rte_intr_callback_fn cb,
-			void *cb_arg)
+		rte_intr_callback_fn cb, void *cb_arg)
 {
-	RTE_SET_USED(intr_handle);
-	RTE_SET_USED(cb);
-	RTE_SET_USED(cb_arg);
+	struct rte_intr_callback *callback = NULL;
+	struct rte_intr_source *src = NULL;
+	int ret, add_event;
 
-	return -ENOTSUP;
+	/* first do parameter checking */
+	if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Registering with invalid input parameter\n");
+		return -EINVAL;
+	}
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq);
+		return -ENODEV;
+	}
+
+	/* allocate a new interrupt callback entity */
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL) {
+		RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+		return -ENOMEM;
+	}
+	callback->cb_fn = cb;
+	callback->cb_arg = cb_arg;
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if there is at least one callback registered for the fd */
+	TAILQ_FOREACH(src, &intr_sources, next) {
+		if (src->intr_handle.fd == intr_handle->fd) {
+			/* we had no interrupts for this */
+			if (TAILQ_EMPTY(&src->callbacks))
+				add_event = 1;
+
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			ret = 0;
+			break;
+		}
+	}
+
+	/* no existing callbacks for this - add new source */
+	if (src == NULL) {
+		src = calloc(1, sizeof(*src));
+		if (src == NULL) {
+			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+			ret = -ENOMEM;
+			goto fail;
+		} else {
+			src->intr_handle = *intr_handle;
+			TAILQ_INIT(&src->callbacks);
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			TAILQ_INSERT_TAIL(&intr_sources, src, next);
+			add_event = 1;
+			ret = 0;
+		}
+	}
+
+	/* add events to the queue. timer events are special as we need to
+	 * re-set the timer.
+	 */
+	if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
+		struct kevent ke;
+
+		memset(&ke, 0, sizeof(ke));
+		ke.flags = EV_ADD; /* mark for addition to the queue */
+
+		if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+			RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n");
+			ret = -ENODEV;
+			goto fail;
+		}
+
+		/**
+		 * add the intr file descriptor into wait list.
+		 */
+		if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+			/* currently, nic_uio does not support interrupts, so
+			 * this error will always be triggered and output to the
+			 * user. so, don't output it unless debug log level set.
+			 */
+			if (errno == ENODEV)
+				RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n",
+					src->intr_handle.fd);
+			else
+				RTE_LOG(ERR, EAL, "Error adding fd %d "
+						"kevent, %s\n",
+						src->intr_handle.fd,
+						strerror(errno));
+			ret = -errno;
+			goto fail;
+		}
+	}
+	rte_spinlock_unlock(&intr_lock);
+
+	return ret;
+fail:
+	/* clean up */
+	if (src != NULL) {
+		TAILQ_REMOVE(&(src->callbacks), callback, next);
+		if (TAILQ_EMPTY(&(src->callbacks))) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			free(src);
+		}
+	}
+	free(callback);
+	rte_spinlock_unlock(&intr_lock);
+	return ret;
 }
 
 int
 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
-			rte_intr_callback_fn cb,
-			void *cb_arg)
+		rte_intr_callback_fn cb_fn, void *cb_arg)
 {
-	RTE_SET_USED(intr_handle);
-	RTE_SET_USED(cb);
-	RTE_SET_USED(cb_arg);
+	int ret;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *cb, *next;
 
-	return -ENOTSUP;
+	/* do parameter checking first */
+	if (intr_handle == NULL || intr_handle->fd < 0) {
+		RTE_LOG(ERR, EAL,
+		"Unregistering with invalid input parameter\n");
+		return -EINVAL;
+	}
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+		return -ENODEV;
+	}
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if the insterrupt source for the fd is existent */
+	TAILQ_FOREACH(src, &intr_sources, next)
+		if (src->intr_handle.fd == intr_handle->fd)
+			break;
+
+	/* No interrupt source registered for the fd */
+	if (src == NULL) {
+		ret = -ENOENT;
+
+	/* interrupt source has some active callbacks right now. */
+	} else if (src->active != 0) {
+		ret = -EAGAIN;
+
+	/* ok to remove. */
+	} else {
+		struct kevent ke;
+
+		ret = 0;
+
+		/* remove it from the kqueue */
+		memset(&ke, 0, sizeof(ke));
+		ke.flags = EV_DELETE; /* mark for deletion from the queue */
+
+		if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+			RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+			ret = -ENODEV;
+			goto out;
+		}
+
+		/**
+		 * remove intr file descriptor from wait list.
+		 */
+		if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+			RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
+				src->intr_handle.fd, strerror(errno));
+			/* removing non-existent even is an expected condition
+			 * in some circumstances (e.g. oneshot events).
+			 */
+		}
+
+		/*walk through the callbacks and remove all that match. */
+		for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+			next = TAILQ_NEXT(cb, next);
+			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+					cb->cb_arg == cb_arg)) {
+				TAILQ_REMOVE(&src->callbacks, cb, next);
+				free(cb);
+				ret++;
+			}
+		}
+
+		/* all callbacks for that source are removed. */
+		if (TAILQ_EMPTY(&src->callbacks)) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			free(src);
+		}
+	}
+out:
+	rte_spinlock_unlock(&intr_lock);
+
+	return ret;
 }
 
 int
-rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
 {
-	return -ENOTSUP;
+	if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+		return 0;
+
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type) {
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
 }
 
 int
-rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
 {
-	return -ENOTSUP;
+	if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+		return 0;
+
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type) {
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+eal_intr_process_interrupts(struct kevent *events, int nfds)
+{
+	struct rte_intr_callback active_cb;
+	union rte_intr_read_buffer buf;
+	struct rte_intr_callback *cb;
+	struct rte_intr_source *src;
+	bool call = false;
+	int n, bytes_read;
+
+	for (n = 0; n < nfds; n++) {
+		int event_fd = events[n].ident;
+
+		rte_spinlock_lock(&intr_lock);
+		TAILQ_FOREACH(src, &intr_sources, next)
+			if (src->intr_handle.fd == event_fd)
+				break;
+		if (src == NULL) {
+			rte_spinlock_unlock(&intr_lock);
+			continue;
+		}
+
+		/* mark this interrupt source as active and release the lock. */
+		src->active = 1;
+		rte_spinlock_unlock(&intr_lock);
+
+		/* set the length to be read dor different handle type */
+		switch (src->intr_handle.type) {
+		case RTE_INTR_HANDLE_ALARM:
+			bytes_read = 0;
+			call = true;
+			break;
+		case RTE_INTR_HANDLE_VDEV:
+		case RTE_INTR_HANDLE_EXT:
+			bytes_read = 0;
+			call = true;
+			break;
+		case RTE_INTR_HANDLE_DEV_EVENT:
+			bytes_read = 0;
+			call = true;
+			break;
+		default:
+			bytes_read = 1;
+			break;
+		}
+
+		if (bytes_read > 0) {
+			/**
+			 * read out to clear the ready-to-be-read flag
+			 * for epoll_wait.
+			 */
+			bytes_read = read(event_fd, &buf, bytes_read);
+			if (bytes_read < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+
+				RTE_LOG(ERR, EAL, "Error reading from file "
+					"descriptor %d: %s\n",
+					event_fd,
+					strerror(errno));
+			} else if (bytes_read == 0)
+				RTE_LOG(ERR, EAL, "Read nothing from file "
+					"descriptor %d\n", event_fd);
+			else
+				call = true;
+		}
+
+		/* grab a lock, again to call callbacks and update status. */
+		rte_spinlock_lock(&intr_lock);
+
+		if (call) {
+			/* Finally, call all callbacks. */
+			TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+				/* make a copy and unlock. */
+				active_cb = *cb;
+				rte_spinlock_unlock(&intr_lock);
+
+				/* call the actual callback */
+				active_cb.cb_fn(active_cb.cb_arg);
+
+				/*get the lock back. */
+				rte_spinlock_lock(&intr_lock);
+			}
+		}
+
+		/* we done with that interrupt source, release it. */
+		src->active = 0;
+		rte_spinlock_unlock(&intr_lock);
+	}
+}
+
+static void *
+eal_intr_thread_main(void *arg __rte_unused)
+{
+	struct kevent events[MAX_INTR_EVENTS];
+	int nfds;
+
+	/* host thread, never break out */
+	for (;;) {
+		/* do not change anything, just wait */
+		nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL);
+
+		/* kevent fail */
+		if (nfds < 0) {
+			if (errno == EINTR)
+				continue;
+			RTE_LOG(ERR, EAL,
+				"kevent returns with fail\n");
+			break;
+		}
+		/* kevent timeout, will never happen here */
+		else if (nfds == 0)
+			continue;
+
+		/* kevent has at least one fd ready to read */
+		eal_intr_process_interrupts(events, nfds);
+	}
+	close(kq);
+	kq = -1;
+	return NULL;
 }
 
 int
 rte_eal_intr_init(void)
 {
-	return 0;
+	int ret = 0;
+
+	/* init the global interrupt source head */
+	TAILQ_INIT(&intr_sources);
+
+	kq = kqueue();
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n");
+		return -1;
+	}
+
+	/* create the host thread to wait/handle the interrupt */
+	ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+			eal_intr_thread_main, NULL);
+	if (ret != 0) {
+		rte_errno = -ret;
+		RTE_LOG(ERR, EAL,
+			"Failed to create thread for interrupt handling\n");
+	}
+
+	return ret;
 }
 
 int
diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
new file mode 100644
index 00000000..f7f07abd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+
+#include "eal_memalloc.h"
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms __rte_unused,
+		int __rte_unused n_segs, size_t __rte_unused page_sz,
+		int __rte_unused socket, bool __rte_unused exact)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return NULL;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused,
+		int n_segs __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+	RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+	return -1;
+}
+
+int
+eal_memalloc_init(void)
+{
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index bdfb8828..16d2bc7c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -6,10 +6,13 @@
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <inttypes.h>
+#include <errno.h>
+#include <string.h>
 #include <fcntl.h>
 
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
 #include "eal_private.h"
@@ -41,129 +44,253 @@ rte_eal_hugepage_init(void)
 	struct rte_mem_config *mcfg;
 	uint64_t total_mem = 0;
 	void *addr;
-	unsigned i, j, seg_idx = 0;
+	unsigned int i, j, seg_idx = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
 	/* for debug purposes, hugetlbfs can be disabled */
 	if (internal_config.no_hugetlbfs) {
-		addr = malloc(internal_config.memory);
-		mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr;
-		mcfg->memseg[0].addr = addr;
-		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
-		mcfg->memseg[0].len = internal_config.memory;
-		mcfg->memseg[0].socket_id = 0;
+		struct rte_memseg_list *msl;
+		struct rte_fbarray *arr;
+		struct rte_memseg *ms;
+		uint64_t page_sz;
+		int n_segs, cur_seg;
+
+		/* create a memseg list */
+		msl = &mcfg->memsegs[0];
+
+		page_sz = RTE_PGSIZE_4K;
+		n_segs = internal_config.memory / page_sz;
+
+		if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
+				sizeof(struct rte_memseg))) {
+			RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+			return -1;
+		}
+
+		addr = mmap(NULL, internal_config.memory,
+				PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+		msl->base_va = addr;
+		msl->page_sz = page_sz;
+		msl->socket_id = 0;
+
+		/* populate memsegs. each memseg is 1 page long */
+		for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
+			arr = &msl->memseg_arr;
+
+			ms = rte_fbarray_get(arr, cur_seg);
+			if (rte_eal_iova_mode() == RTE_IOVA_VA)
+				ms->iova = (uintptr_t)addr;
+			else
+				ms->iova = RTE_BAD_IOVA;
+			ms->addr = addr;
+			ms->hugepage_sz = page_sz;
+			ms->len = page_sz;
+			ms->socket_id = 0;
+
+			rte_fbarray_set_used(arr, cur_seg);
+
+			addr = RTE_PTR_ADD(addr, page_sz);
+		}
 		return 0;
 	}
 
 	/* map all hugepages and sort them */
 	for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
 		struct hugepage_info *hpi;
+		rte_iova_t prev_end = 0;
+		int prev_ms_idx = -1;
+		uint64_t page_sz, mem_needed;
+		unsigned int n_pages, max_pages;
 
 		hpi = &internal_config.hugepage_info[i];
-		for (j = 0; j < hpi->num_pages[0]; j++) {
+		page_sz = hpi->hugepage_sz;
+		max_pages = hpi->num_pages[0];
+		mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
+				page_sz);
+
+		n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
+
+		for (j = 0; j < n_pages; j++) {
+			struct rte_memseg_list *msl;
+			struct rte_fbarray *arr;
 			struct rte_memseg *seg;
+			int msl_idx, ms_idx;
 			rte_iova_t physaddr;
 			int error;
 			size_t sysctl_size = sizeof(physaddr);
 			char physaddr_str[64];
+			bool is_adjacent;
 
-			addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
-				    MAP_SHARED, hpi->lock_descriptor,
-				    j * EAL_PAGE_SIZE);
-			if (addr == MAP_FAILED) {
-				RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
-						j, hpi->hugedir);
-				return -1;
-			}
-
-			snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
-					".physaddr.%d", j);
-			error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
-					NULL, 0);
+			/* first, check if this segment is IOVA-adjacent to
+			 * the previous one.
+			 */
+			snprintf(physaddr_str, sizeof(physaddr_str),
+					"hw.contigmem.physaddr.%d", j);
+			error = sysctlbyname(physaddr_str, &physaddr,
+					&sysctl_size, NULL, 0);
 			if (error < 0) {
 				RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
 						"from %s\n", j, hpi->hugedir);
 				return -1;
 			}
 
-			seg = &mcfg->memseg[seg_idx++];
+			is_adjacent = prev_end != 0 && physaddr == prev_end;
+			prev_end = physaddr + hpi->hugepage_sz;
+
+			for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
+					msl_idx++) {
+				bool empty, need_hole;
+				msl = &mcfg->memsegs[msl_idx];
+				arr = &msl->memseg_arr;
+
+				if (msl->page_sz != page_sz)
+					continue;
+
+				empty = arr->count == 0;
+
+				/* we need a hole if this isn't an empty memseg
+				 * list, and if previous segment was not
+				 * adjacent to current one.
+				 */
+				need_hole = !empty && !is_adjacent;
+
+				/* we need 1, plus hole if not adjacent */
+				ms_idx = rte_fbarray_find_next_n_free(arr,
+						0, 1 + (need_hole ? 1 : 0));
+
+				/* memseg list is full? */
+				if (ms_idx < 0)
+					continue;
+
+				if (need_hole && prev_ms_idx == ms_idx - 1)
+					ms_idx++;
+				prev_ms_idx = ms_idx;
+
+				break;
+			}
+			if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+				RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
+					RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
+					RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
+				return -1;
+			}
+			arr = &msl->memseg_arr;
+			seg = rte_fbarray_get(arr, ms_idx);
+
+			addr = RTE_PTR_ADD(msl->base_va,
+					(size_t)msl->page_sz * ms_idx);
+
+			/* address is already mapped in memseg list, so using
+			 * MAP_FIXED here is safe.
+			 */
+			addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
+					MAP_SHARED | MAP_FIXED,
+					hpi->lock_descriptor,
+					j * EAL_PAGE_SIZE);
+			if (addr == MAP_FAILED) {
+				RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+						j, hpi->hugedir);
+				return -1;
+			}
+
 			seg->addr = addr;
 			seg->iova = physaddr;
-			seg->hugepage_sz = hpi->hugepage_sz;
-			seg->len = hpi->hugepage_sz;
+			seg->hugepage_sz = page_sz;
+			seg->len = page_sz;
 			seg->nchannel = mcfg->nchannel;
 			seg->nrank = mcfg->nrank;
 			seg->socket_id = 0;
 
+			rte_fbarray_set_used(arr, ms_idx);
+
 			RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
 					PRIx64", len %zu\n",
-					seg_idx, addr, physaddr, hpi->hugepage_sz);
-			if (total_mem >= internal_config.memory ||
-					seg_idx >= RTE_MAX_MEMSEG)
-				break;
+					seg_idx++, addr, physaddr, page_sz);
+
+			total_mem += seg->len;
 		}
+		if (total_mem >= internal_config.memory)
+			break;
+	}
+	if (total_mem < internal_config.memory) {
+		RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
+				"requested: %" PRIu64 "M "
+				"available: %" PRIu64 "M\n",
+				internal_config.memory >> 20, total_mem >> 20);
+		return -1;
 	}
 	return 0;
 }
 
+struct attach_walk_args {
+	int fd_hugepage;
+	int seg_idx;
+};
+static int
+attach_segment(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct attach_walk_args *wa = arg;
+	void *addr;
+
+	addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
+			wa->seg_idx * EAL_PAGE_SIZE);
+	if (addr == MAP_FAILED || addr != ms->addr)
+		return -1;
+	wa->seg_idx++;
+
+	return 0;
+}
+
 int
 rte_eal_hugepage_attach(void)
 {
 	const struct hugepage_info *hpi;
-	int fd_hugepage_info, fd_hugepage = -1;
-	unsigned i = 0;
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int fd_hugepage = -1;
+	unsigned int i;
 
-	/* Obtain a file descriptor for hugepage_info */
-	fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY);
-	if (fd_hugepage_info < 0) {
-		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
-		return -1;
-	}
+	hpi = &internal_config.hugepage_info[0];
 
-	/* Map the shared hugepage_info into the process address spaces */
-	hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE,
-			fd_hugepage_info, 0);
-	if (hpi == MAP_FAILED) {
-		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
-		goto error;
-	}
-
-	/* Obtain a file descriptor for contiguous memory */
-	fd_hugepage = open(hpi->hugedir, O_RDWR);
-	if (fd_hugepage < 0) {
-		RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir);
-		goto error;
-	}
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		const struct hugepage_info *cur_hpi = &hpi[i];
+		struct attach_walk_args wa;
 
-	/* Map the contiguous memory into each memory segment */
-	for (i = 0; i < hpi->num_pages[0]; i++) {
+		memset(&wa, 0, sizeof(wa));
 
-		void *addr;
-		struct rte_memseg *seg = &mcfg->memseg[i];
+		/* Obtain a file descriptor for contiguous memory */
+		fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
+		if (fd_hugepage < 0) {
+			RTE_LOG(ERR, EAL, "Could not open %s\n",
+					cur_hpi->hugedir);
+			goto error;
+		}
+		wa.fd_hugepage = fd_hugepage;
+		wa.seg_idx = 0;
 
-		addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
-			    MAP_SHARED|MAP_FIXED, fd_hugepage,
-			    i * EAL_PAGE_SIZE);
-		if (addr == MAP_FAILED || addr != seg->addr) {
+		/* Map the contiguous memory into each memory segment */
+		if (rte_memseg_walk(attach_segment, &wa) < 0) {
 			RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
-				i, hpi->hugedir);
+				wa.seg_idx, cur_hpi->hugedir);
 			goto error;
 		}
 
+		close(fd_hugepage);
+		fd_hugepage = -1;
 	}
 
 	/* hugepage_info is no longer required */
-	munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info));
-	close(fd_hugepage_info);
-	close(fd_hugepage);
 	return 0;
 
 error:
-	if (fd_hugepage_info >= 0)
-		close(fd_hugepage_info);
 	if (fd_hugepage >= 0)
 		close(fd_hugepage);
 	return -1;
@@ -174,3 +301,217 @@ rte_eal_using_phys_addrs(void)
 {
 	return 0;
 }
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+	uint64_t area_sz, max_pages;
+
+	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+	max_pages = RTE_MAX_MEMSEG_PER_LIST;
+	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+	/* make sure the list isn't smaller than the page size */
+	area_sz = RTE_MAX(area_sz, page_sz);
+
+	return RTE_ALIGN(area_sz, page_sz);
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+		int n_segs, int socket_id, int type_msl_idx)
+{
+	char name[RTE_FBARRAY_NAME_LEN];
+
+	snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+		 type_msl_idx);
+	if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+			sizeof(struct rte_memseg))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+			rte_strerror(rte_errno));
+		return -1;
+	}
+
+	msl->page_sz = page_sz;
+	msl->socket_id = socket_id;
+	msl->base_va = NULL;
+
+	RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+			(size_t)page_sz >> 10, socket_id);
+
+	return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+	uint64_t page_sz;
+	size_t mem_sz;
+	void *addr;
+	int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+
+	page_sz = msl->page_sz;
+	mem_sz = page_sz * msl->memseg_arr.len;
+
+	addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+	if (addr == NULL) {
+		if (rte_errno == EADDRNOTAVAIL)
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+				(unsigned long long)mem_sz, msl->base_va);
+		else
+			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+		return -1;
+	}
+	msl->base_va = addr;
+
+	return 0;
+}
+
+
+static int
+memseg_primary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int hpi_idx, msl_idx = 0;
+	struct rte_memseg_list *msl;
+	uint64_t max_mem, total_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	/* FreeBSD has an issue where core dump will dump the entire memory
+	 * contents, including anonymous zero-page memory. Therefore, while we
+	 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
+	 * also be further limiting total memory amount to whatever memory is
+	 * available to us through contigmem driver (plus spacing blocks).
+	 *
+	 * so, at each stage, we will be checking how much memory we are
+	 * preallocating, and adjust all the values accordingly.
+	 */
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	total_mem = 0;
+
+	/* create memseg lists */
+	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+			hpi_idx++) {
+		uint64_t max_type_mem, total_type_mem = 0;
+		uint64_t avail_mem;
+		int type_msl_idx, max_segs, avail_segs, total_segs = 0;
+		struct hugepage_info *hpi;
+		uint64_t hugepage_sz;
+
+		hpi = &internal_config.hugepage_info[hpi_idx];
+		hugepage_sz = hpi->hugepage_sz;
+
+		/* no NUMA support on FreeBSD */
+
+		/* check if we've already exceeded total memory amount */
+		if (total_mem >= max_mem)
+			break;
+
+		/* first, calculate theoretical limits according to config */
+		max_type_mem = RTE_MIN(max_mem - total_mem,
+			(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+		max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+		/* now, limit all of that to whatever will actually be
+		 * available to us, because without dynamic allocation support,
+		 * all of that extra memory will be sitting there being useless
+		 * and slowing down core dumps in case of a crash.
+		 *
+		 * we need (N*2)-1 segments because we cannot guarantee that
+		 * each segment will be IOVA-contiguous with the previous one,
+		 * so we will allocate more and put spaces inbetween segments
+		 * that are non-contiguous.
+		 */
+		avail_segs = (hpi->num_pages[0] * 2) - 1;
+		avail_mem = avail_segs * hugepage_sz;
+
+		max_type_mem = RTE_MIN(avail_mem, max_type_mem);
+		max_segs = RTE_MIN(avail_segs, max_segs);
+
+		type_msl_idx = 0;
+		while (total_type_mem < max_type_mem &&
+				total_segs < max_segs) {
+			uint64_t cur_max_mem, cur_mem;
+			unsigned int n_segs;
+
+			if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+				RTE_LOG(ERR, EAL,
+					"No more space in memseg lists, please increase %s\n",
+					RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+				return -1;
+			}
+
+			msl = &mcfg->memsegs[msl_idx++];
+
+			cur_max_mem = max_type_mem - total_type_mem;
+
+			cur_mem = get_mem_amount(hugepage_sz,
+					cur_max_mem);
+			n_segs = cur_mem / hugepage_sz;
+
+			if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+					0, type_msl_idx))
+				return -1;
+
+			total_segs += msl->memseg_arr.len;
+			total_type_mem = total_segs * hugepage_sz;
+			type_msl_idx++;
+
+			if (alloc_va_space(msl)) {
+				RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+				return -1;
+			}
+		}
+		total_mem += total_type_mem;
+	}
+	return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx = 0;
+	struct rte_memseg_list *msl;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+		msl = &mcfg->memsegs[msl_idx];
+
+		/* skip empty memseg lists */
+		if (msl->memseg_arr.len == 0)
+			continue;
+
+		if (rte_fbarray_attach(&msl->memseg_arr)) {
+			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+			return -1;
+		}
+
+		/* preallocate VA space */
+		if (alloc_va_space(msl)) {
+			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+			memseg_primary_init() :
+			memseg_secondary_init();
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index d602daf8..309b5872 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
 	if (eal_thread_set_affinity() < 0)
 		rte_panic("cannot set affinity\n");
 
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build
index e83fc919..3945b529 100644
--- a/lib/librte_eal/bsdapp/eal/meson.build
+++ b/lib/librte_eal/bsdapp/eal/meson.build
@@ -4,12 +4,17 @@
 env_objs = []
 env_headers = []
 env_sources = files('eal_alarm.c',
+		'eal_cpuflags.c',
 		'eal_debug.c',
 		'eal_hugepage_info.c',
 		'eal_interrupts.c',
 		'eal_lcore.c',
+		'eal_memalloc.c',
 		'eal_thread.c',
 		'eal_timer.c',
 		'eal.c',
 		'eal_memory.c',
+		'eal_dev.c'
 )
+
+deps += ['kvargs']
diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
deleted file mode 100644
index b6f92d55..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-#
-
-KMOD=	nic_uio
-SRCS=	nic_uio.c device_if.h bus_if.h pci_if.h
-
-.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile
deleted file mode 100644
index 376ef3a3..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = nic_uio
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR)
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -Winline -Wall -Werror
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := nic_uio.c
-
-include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/nic_uio/meson.build b/lib/librte_eal/bsdapp/nic_uio/meson.build
deleted file mode 100644
index 4bdaf969..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/meson.build
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
-
-sources = files('nic_uio.c')
diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
deleted file mode 100644
index 401b487e..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h> /* defines used in kernel.h */
-#include <sys/module.h>
-#include <sys/kernel.h> /* types used in module initialization */
-#include <sys/conf.h> /* cdevsw struct */
-#include <sys/bus.h> /* structs, prototypes for pci bus stuff and DEVMETHOD */
-#include <sys/rman.h>
-#include <sys/systm.h>
-#include <sys/rwlock.h>
-#include <sys/proc.h>
-
-#include <machine/bus.h>
-#include <dev/pci/pcivar.h> /* For pci_get macros! */
-#include <dev/pci/pcireg.h> /* The softc holds our per-instance data. */
-#include <vm/vm.h>
-#include <vm/uma.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-
-
-#define MAX_BARS (PCIR_MAX_BAR_0 + 1)
-
-#define MAX_DETACHED_DEVICES	128
-static device_t detached_devices[MAX_DETACHED_DEVICES] = {};
-static int num_detached = 0;
-
-struct nic_uio_softc {
-	device_t        dev_t;
-	struct cdev     *my_cdev;
-	int              bar_id[MAX_BARS];
-	struct resource *bar_res[MAX_BARS];
-	u_long           bar_start[MAX_BARS];
-	u_long           bar_size[MAX_BARS];
-};
-
-/* Function prototypes */
-static d_open_t         nic_uio_open;
-static d_close_t        nic_uio_close;
-static d_mmap_t         nic_uio_mmap;
-static d_mmap_single_t  nic_uio_mmap_single;
-static int              nic_uio_probe(device_t dev);
-static int              nic_uio_attach(device_t dev);
-static int              nic_uio_detach(device_t dev);
-static int              nic_uio_shutdown(void);
-static int              nic_uio_modevent(module_t mod, int type, void *arg);
-
-static struct cdevsw uio_cdevsw = {
-		.d_name        = "nic_uio",
-		.d_version     = D_VERSION,
-		.d_open        = nic_uio_open,
-		.d_close       = nic_uio_close,
-		.d_mmap        = nic_uio_mmap,
-		.d_mmap_single = nic_uio_mmap_single,
-};
-
-static device_method_t nic_uio_methods[] = {
-	DEVMETHOD(device_probe,    nic_uio_probe),
-	DEVMETHOD(device_attach,   nic_uio_attach),
-	DEVMETHOD(device_detach,   nic_uio_detach),
-	DEVMETHOD_END
-};
-
-struct device {
-    int vend;
-    int dev;
-};
-
-struct pci_bdf {
-	uint32_t bus;
-	uint32_t devid;
-	uint32_t function;
-};
-
-static devclass_t nic_uio_devclass;
-
-DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc));
-DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0);
-
-static int
-nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
-		int prot, vm_memattr_t *memattr)
-{
-	*paddr = offset;
-	return 0;
-}
-
-static int
-nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
-		struct vm_object **obj, int nprot)
-{
-	/*
-	 * The BAR index is encoded in the offset.  Divide the offset by
-	 *  PAGE_SIZE to get the index of the bar requested by the user
-	 *  app.
-	 */
-	unsigned bar = *offset/PAGE_SIZE;
-	struct nic_uio_softc *sc = cdev->si_drv1;
-
-	if (bar >= MAX_BARS)
-		return EINVAL;
-
-	if (sc->bar_res[bar] == NULL) {
-		sc->bar_id[bar] = PCIR_BAR(bar);
-
-		if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4)))
-			sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT,
-					&sc->bar_id[bar], RF_ACTIVE);
-		else
-			sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY,
-					&sc->bar_id[bar], RF_ACTIVE);
-	}
-	if (sc->bar_res[bar] == NULL)
-		return ENXIO;
-
-	sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]);
-	sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]);
-
-	device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar,
-			sc->bar_start[bar], sc->bar_size[bar]);
-
-	*offset = sc->bar_start[bar];
-	*obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
-				curthread->td_ucred);
-	return 0;
-}
-
-
-int
-nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
-	return 0;
-}
-
-int
-nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
-{
-	return 0;
-}
-
-static int
-nic_uio_probe (device_t dev)
-{
-	int i;
-	unsigned int bus = pci_get_bus(dev);
-	unsigned int device = pci_get_slot(dev);
-	unsigned int function = pci_get_function(dev);
-
-	char bdf_str[256];
-	char *token, *remaining;
-
-	/* First check if we found this on load */
-	for (i = 0; i < num_detached; i++)
-		if (bus == pci_get_bus(detached_devices[i]) &&
-		    device == pci_get_slot(detached_devices[i]) &&
-		    function == pci_get_function(detached_devices[i])) {
-			device_set_desc(dev, "DPDK PCI Device");
-			return BUS_PROBE_SPECIFIC;
-		}
-
-	/* otherwise check if it's a new device and if it matches the BDF */
-	memset(bdf_str, 0, sizeof(bdf_str));
-	TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
-	remaining = bdf_str;
-	while (1) {
-		if (remaining == NULL || remaining[0] == '\0')
-			break;
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		bus = strtol(token, NULL, 10);
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		device = strtol(token, NULL, 10);
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		function = strtol(token, NULL, 10);
-
-		if (bus == pci_get_bus(dev) &&
-				device == pci_get_slot(dev) &&
-				function == pci_get_function(dev)) {
-
-			if (num_detached < MAX_DETACHED_DEVICES) {
-				printf("%s: probed dev=%p\n",
-					       __func__, dev);
-				detached_devices[num_detached++] = dev;
-				device_set_desc(dev, "DPDK PCI Device");
-				return BUS_PROBE_SPECIFIC;
-			} else {
-				printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
-						__func__, MAX_DETACHED_DEVICES,
-						dev);
-				break;
-			}
-		}
-	}
-
-	return ENXIO;
-}
-
-static int
-nic_uio_attach(device_t dev)
-{
-	int i;
-	struct nic_uio_softc *sc;
-
-	sc = device_get_softc(dev);
-	sc->dev_t = dev;
-	sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev),
-			UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u",
-			pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
-	if (sc->my_cdev == NULL)
-		return ENXIO;
-	sc->my_cdev->si_drv1 = sc;
-
-	for (i = 0; i < MAX_BARS; i++)
-		sc->bar_res[i] = NULL;
-
-	pci_enable_busmaster(dev);
-
-	return 0;
-}
-
-static int
-nic_uio_detach(device_t dev)
-{
-	int i;
-	struct nic_uio_softc *sc;
-	sc = device_get_softc(dev);
-
-	for (i = 0; i < MAX_BARS; i++)
-		if (sc->bar_res[i] != NULL) {
-
-			if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4)))
-				bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i],
-						sc->bar_res[i]);
-			else
-				bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i],
-						sc->bar_res[i]);
-		}
-
-	if (sc->my_cdev != NULL)
-		destroy_dev(sc->my_cdev);
-	return 0;
-}
-
-static void
-nic_uio_load(void)
-{
-	uint32_t bus, device, function;
-	device_t dev;
-	char bdf_str[256];
-	char *token, *remaining;
-
-	memset(bdf_str, 0, sizeof(bdf_str));
-	TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
-	remaining = bdf_str;
-	printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str);
-	/*
-	 * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
-	 *  But the code below does not try differentiate between : and ,
-	 *  and just blindly uses 3 tokens at a time to construct a
-	 *  bus/device/function tuple.
-	 *
-	 * There is no checking on strtol() return values, but this should
-	 *  be OK.  Worst case is it cannot convert and returns 0.  This
-	 *  could give us a different BDF than intended, but as long as the
-	 *  PCI device/vendor ID does not match it will not matter.
-	 */
-	while (1) {
-		if (remaining == NULL || remaining[0] == '\0')
-			break;
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		bus = strtol(token, NULL, 10);
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		device = strtol(token, NULL, 10);
-		token = strsep(&remaining, ",:");
-		if (token == NULL)
-			break;
-		function = strtol(token, NULL, 10);
-
-		dev = pci_find_bsf(bus, device, function);
-		if (dev == NULL)
-			continue;
-
-		if (num_detached < MAX_DETACHED_DEVICES) {
-			printf("nic_uio_load: detaching and storing dev=%p\n",
-			       dev);
-			detached_devices[num_detached++] = dev;
-		} else {
-			printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
-			       MAX_DETACHED_DEVICES, dev);
-		}
-		device_detach(dev);
-	}
-}
-
-static void
-nic_uio_unload(void)
-{
-	int i;
-	printf("nic_uio_unload: entered...\n");
-
-	for (i = 0; i < num_detached; i++) {
-		printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n",
-			detached_devices[i]);
-		device_probe_and_attach(detached_devices[i]);
-		printf("nic_uio_unload: done.\n");
-	}
-
-	printf("nic_uio_unload: leaving...\n");
-}
-
-static int
-nic_uio_shutdown(void)
-{
-	return 0;
-}
-
-static int
-nic_uio_modevent(module_t mod, int type, void *arg)
-{
-
-	switch (type) {
-	case MOD_LOAD:
-		nic_uio_load();
-		break;
-	case MOD_UNLOAD:
-		nic_uio_unload();
-		break;
-	case MOD_SHUTDOWN:
-		nic_uio_shutdown();
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index ea824a3a..cca68826 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h
 INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h
+INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 88f1cbe3..caf3dc83 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -1,34 +1,6 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright (C) Cavium, Inc. 2015.
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Cavium, Inc nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) Cavium, Inc. 2015.
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #include "rte_cpuflags.h"
@@ -133,22 +105,10 @@ const struct feature_entry rte_cpu_feature_table[] = {
 static void
 rte_cpu_get_features(hwcap_registers_t out)
 {
-	int auxv_fd;
-	_Elfx_auxv_t auxv;
-
-	auxv_fd = open("/proc/self/auxv", O_RDONLY);
-	assert(auxv_fd != -1);
-	while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
-		if (auxv.a_type == AT_HWCAP) {
-			out[REG_HWCAP] = auxv.a_un.a_val;
-		} else if (auxv.a_type == AT_HWCAP2) {
-			out[REG_HWCAP2] = auxv.a_un.a_val;
-		} else if (auxv.a_type == AT_PLATFORM) {
-			if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR))
-				out[REG_PLATFORM] = 0x0001;
-		}
-	}
-	close(auxv_fd);
+	out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+	out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+	if (!rte_cpu_strcmp_auxval(AT_PLATFORM, PLATFORM_STR))
+		out[REG_PLATFORM] = 0x0001;
 }
 
 /*
diff --git a/lib/librte_eal/common/arch/arm/rte_hypervisor.c b/lib/librte_eal/common/arch/arm/rte_hypervisor.c
index 3792fe2c..08a1c97d 100644
--- a/lib/librte_eal/common/arch/arm/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/arm/rte_hypervisor.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 #include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
index 970a61c5..e7a82452 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -104,19 +104,8 @@ const struct feature_entry rte_cpu_feature_table[] = {
 static void
 rte_cpu_get_features(hwcap_registers_t out)
 {
-	int auxv_fd;
-	Elf64_auxv_t auxv;
-
-	auxv_fd = open("/proc/self/auxv", O_RDONLY);
-	assert(auxv_fd != -1);
-	while (read(auxv_fd, &auxv,
-		sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
-		if (auxv.a_type == AT_HWCAP)
-			out[REG_HWCAP] = auxv.a_un.a_val;
-		else if (auxv.a_type == AT_HWCAP2)
-			out[REG_HWCAP2] = auxv.a_un.a_val;
-	}
-	close(auxv_fd);
+	out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+	out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
 }
 
 /*
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
index 3792fe2c..08a1c97d 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 #include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/arch/x86/rte_hypervisor.c b/lib/librte_eal/common/arch/x86/rte_hypervisor.c
index edf07be1..c38cfc09 100644
--- a/lib/librte_eal/common/arch/x86/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/x86/rte_hypervisor.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 #include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 3e022d51..0943851c 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -36,6 +36,7 @@
 
 #include <rte_bus.h>
 #include <rte_debug.h>
+#include <rte_string_fns.h>
 
 #include "eal_private.h"
 
@@ -212,7 +213,7 @@ rte_bus_find_by_device_name(const char *str)
 	char name[RTE_DEV_NAME_MAX_LEN];
 	char *c;
 
-	snprintf(name, sizeof(name), "%s", str);
+	strlcpy(name, str, sizeof(name));
 	c = strchr(name, ',');
 	if (c != NULL)
 		c[0] = '\0';
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 00000000..404a9065
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+struct rte_class_list rte_class_list =
+	TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+__rte_experimental void
+rte_class_register(struct rte_class *class)
+{
+	RTE_VERIFY(class);
+	RTE_VERIFY(class->name && strlen(class->name));
+
+	TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+__rte_experimental void
+rte_class_unregister(struct rte_class *class)
+{
+	TAILQ_REMOVE(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+	       const void *data)
+{
+	struct rte_class *cls;
+
+	if (start != NULL)
+		cls = TAILQ_NEXT(start, next);
+	else
+		cls = TAILQ_FIRST(&rte_class_list);
+	while (cls != NULL) {
+		if (cmp(cls, data) == 0)
+			break;
+		cls = TAILQ_NEXT(cls, next);
+	}
+	return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(class->name, name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+	return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index cd071442..678dbcac 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -10,24 +10,62 @@
 
 #include <rte_compat.h>
 #include <rte_bus.h>
+#include <rte_class.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
 #include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
 
 #include "eal_private.h"
 
-static int cmp_detached_dev_name(const struct rte_device *dev,
-	const void *_name)
-{
-	const char *name = _name;
+/**
+ * The device event callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the device name.
+ */
+struct dev_event_callback {
+	TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
+	rte_dev_event_cb_fn cb_fn;            /**< Callback address */
+	void *cb_arg;                         /**< Callback parameter */
+	char *dev_name;	 /**< Callback device name, NULL is for all device */
+	uint32_t active;                      /**< Callback is executing */
+};
 
-	/* skip attached devices */
-	if (dev->driver != NULL)
-		return 1;
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
 
-	return strcmp(dev->name, name);
-}
+/* The device event callback list for all registered callbacks. */
+static struct dev_event_cb_list dev_event_cbs;
+
+/* spinlock for device callbacks */
+static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
+
+struct dev_next_ctx {
+	struct rte_dev_iterator *it;
+	const char *bus_str;
+	const char *cls_str;
+};
+
+#define CTX(it, bus_str, cls_str) \
+	(&(const struct dev_next_ctx){ \
+		.it = it, \
+		.bus_str = bus_str, \
+		.cls_str = cls_str, \
+	})
+
+#define ITCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
 
 static int cmp_dev_name(const struct rte_device *dev, const void *_name)
 {
@@ -89,29 +127,12 @@ int rte_eal_dev_detach(struct rte_device *dev)
 	return ret;
 }
 
-static char *
-full_dev_name(const char *bus, const char *dev, const char *args)
-{
-	char *name;
-	size_t len;
-
-	len = snprintf(NULL, 0, "%s:%s,%s", bus, dev, args) + 1;
-	name = calloc(1, len);
-	if (name == NULL) {
-		RTE_LOG(ERR, EAL, "Could not allocate full device name\n");
-		return NULL;
-	}
-	snprintf(name, len, "%s:%s,%s", bus, dev, args);
-	return name;
-}
-
 int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
 			const char *devargs)
 {
 	struct rte_bus *bus;
 	struct rte_device *dev;
 	struct rte_devargs *da;
-	char *name;
 	int ret;
 
 	bus = rte_bus_find_by_name(busname);
@@ -126,21 +147,16 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
 		return -ENOTSUP;
 	}
 
-	name = full_dev_name(busname, devname, devargs);
-	if (name == NULL)
-		return -ENOMEM;
-
 	da = calloc(1, sizeof(*da));
-	if (da == NULL) {
-		ret = -ENOMEM;
-		goto err_name;
-	}
+	if (da == NULL)
+		return -ENOMEM;
 
-	ret = rte_eal_devargs_parse(name, da);
+	ret = rte_devargs_parsef(da, "%s:%s,%s",
+				 busname, devname, devargs);
 	if (ret)
 		goto err_devarg;
 
-	ret = rte_eal_devargs_insert(da);
+	ret = rte_devargs_insert(da);
 	if (ret)
 		goto err_devarg;
 
@@ -148,30 +164,32 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
 	if (ret)
 		goto err_devarg;
 
-	dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+	dev = bus->find_device(NULL, cmp_dev_name, devname);
 	if (dev == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+		RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
 			devname);
 		ret = -ENODEV;
 		goto err_devarg;
 	}
 
+	if (dev->driver != NULL) {
+		RTE_LOG(ERR, EAL, "Device is already plugged\n");
+		return -EEXIST;
+	}
+
 	ret = bus->plug(dev);
 	if (ret) {
 		RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
 			dev->name);
 		goto err_devarg;
 	}
-	free(name);
 	return 0;
 
 err_devarg:
-	if (rte_eal_devargs_remove(busname, devname)) {
+	if (rte_devargs_remove(busname, devname)) {
 		free(da->args);
 		free(da);
 	}
-err_name:
-	free(name);
 	return ret;
 }
 
@@ -200,10 +218,349 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
 		return -EINVAL;
 	}
 
+	if (dev->driver == NULL) {
+		RTE_LOG(ERR, EAL, "Device is already unplugged\n");
+		return -ENOENT;
+	}
+
 	ret = bus->unplug(dev);
 	if (ret)
 		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
 			dev->name);
-	rte_eal_devargs_remove(busname, devname);
+	rte_devargs_remove(busname, devname);
+	return ret;
+}
+
+int __rte_experimental
+rte_dev_event_callback_register(const char *device_name,
+				rte_dev_event_cb_fn cb_fn,
+				void *cb_arg)
+{
+	struct dev_event_callback *event_cb;
+	int ret;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&dev_event_lock);
+
+	if (TAILQ_EMPTY(&dev_event_cbs))
+		TAILQ_INIT(&dev_event_cbs);
+
+	TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
+		if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) {
+			if (device_name == NULL && event_cb->dev_name == NULL)
+				break;
+			if (device_name == NULL || event_cb->dev_name == NULL)
+				continue;
+			if (!strcmp(event_cb->dev_name, device_name))
+				break;
+		}
+	}
+
+	/* create a new callback. */
+	if (event_cb == NULL) {
+		event_cb = malloc(sizeof(struct dev_event_callback));
+		if (event_cb != NULL) {
+			event_cb->cb_fn = cb_fn;
+			event_cb->cb_arg = cb_arg;
+			event_cb->active = 0;
+			if (!device_name) {
+				event_cb->dev_name = NULL;
+			} else {
+				event_cb->dev_name = strdup(device_name);
+				if (event_cb->dev_name == NULL) {
+					ret = -ENOMEM;
+					goto error;
+				}
+			}
+			TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next);
+		} else {
+			RTE_LOG(ERR, EAL,
+				"Failed to allocate memory for device "
+				"event callback.");
+			ret = -ENOMEM;
+			goto error;
+		}
+	} else {
+		RTE_LOG(ERR, EAL,
+			"The callback is already exist, no need "
+			"to register again.\n");
+		ret = -EEXIST;
+	}
+
+	rte_spinlock_unlock(&dev_event_lock);
+	return 0;
+error:
+	free(event_cb);
+	rte_spinlock_unlock(&dev_event_lock);
+	return ret;
+}
+
+int __rte_experimental
+rte_dev_event_callback_unregister(const char *device_name,
+				  rte_dev_event_cb_fn cb_fn,
+				  void *cb_arg)
+{
+	int ret = 0;
+	struct dev_event_callback *event_cb, *next;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&dev_event_lock);
+	/*walk through the callbacks and remove all that match. */
+	for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
+	     event_cb = next) {
+
+		next = TAILQ_NEXT(event_cb, next);
+
+		if (device_name != NULL && event_cb->dev_name != NULL) {
+			if (!strcmp(event_cb->dev_name, device_name)) {
+				if (event_cb->cb_fn != cb_fn ||
+				    (cb_arg != (void *)-1 &&
+				    event_cb->cb_arg != cb_arg))
+					continue;
+			}
+		} else if (device_name != NULL) {
+			continue;
+		}
+
+		/*
+		 * if this callback is not executing right now,
+		 * then remove it.
+		 */
+		if (event_cb->active == 0) {
+			TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
+			free(event_cb);
+			ret++;
+		} else {
+			continue;
+		}
+	}
+	rte_spinlock_unlock(&dev_event_lock);
 	return ret;
 }
+
+void
+dev_callback_process(char *device_name, enum rte_dev_event_type event)
+{
+	struct dev_event_callback *cb_lst;
+
+	if (device_name == NULL)
+		return;
+
+	rte_spinlock_lock(&dev_event_lock);
+
+	TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
+		if (cb_lst->dev_name) {
+			if (strcmp(cb_lst->dev_name, device_name))
+				continue;
+		}
+		cb_lst->active = 1;
+		rte_spinlock_unlock(&dev_event_lock);
+		cb_lst->cb_fn(device_name, event,
+				cb_lst->cb_arg);
+		rte_spinlock_lock(&dev_event_lock);
+		cb_lst->active = 0;
+	}
+	rte_spinlock_unlock(&dev_event_lock);
+}
+
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+		      const char *dev_str)
+{
+	struct rte_devargs devargs;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+
+	/* Having both bus_str and cls_str NULL is illegal,
+	 * marking this iterator as invalid unless
+	 * everything goes well.
+	 */
+	it->bus_str = NULL;
+	it->cls_str = NULL;
+
+	devargs.data = dev_str;
+	if (rte_devargs_layers_parse(&devargs, dev_str))
+		goto get_out;
+
+	bus = devargs.bus;
+	cls = devargs.cls;
+	/* The string should have at least
+	 * one layer specified.
+	 */
+	if (bus == NULL && cls == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Either bus or class must be specified.\n");
+		rte_errno = EINVAL;
+		goto get_out;
+	}
+	if (bus != NULL && bus->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	if (cls != NULL && cls->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	it->bus_str = devargs.bus_str;
+	it->cls_str = devargs.cls_str;
+	it->dev_str = dev_str;
+	it->bus = bus;
+	it->cls = cls;
+	it->device = NULL;
+	it->class_device = NULL;
+get_out:
+	return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+	size_t end;
+	char *copy;
+
+	end = strcspn(str, ",/");
+	if (str[end] == ',') {
+		copy = strdup(&str[end + 1]);
+	} else {
+		/* '/' or '\0' */
+		copy = strdup("");
+	}
+	if (copy == NULL) {
+		rte_errno = ENOMEM;
+	} else {
+		char *slash;
+
+		slash = strchr(copy, '/');
+		if (slash != NULL)
+			slash[0] = '\0';
+	}
+	return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+		   const void *ctx)
+{
+	struct rte_dev_iterator *it;
+	const char *cls_str = NULL;
+	void *dev;
+
+	if (cls->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	cls_str = CLSCTX(ctx);
+	dev = it->class_device;
+	/* it->cls_str != NULL means a class
+	 * was specified in the devstr.
+	 */
+	if (it->cls_str != NULL && cls != it->cls)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	dev = cls->dev_iterate(dev, cls_str, it);
+	it->class_device = dev;
+	return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+		 const void *ctx)
+{
+	struct rte_device *dev = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_dev_iterator *it;
+	const char *bus_str = NULL;
+
+	if (bus->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	bus_str = BUSCTX(ctx);
+	dev = it->device;
+	/* it->bus_str != NULL means a bus
+	 * was specified in the devstr.
+	 */
+	if (it->bus_str != NULL && bus != it->bus)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	if (it->cls_str == NULL) {
+		dev = bus->dev_iterate(dev, bus_str, it);
+		goto end;
+	}
+	/* cls_str != NULL */
+	if (dev == NULL) {
+next_dev_on_bus:
+		dev = bus->dev_iterate(dev, bus_str, it);
+		it->device = dev;
+	}
+	if (dev == NULL)
+		return 1;
+	if (it->cls != NULL)
+		cls = TAILQ_PREV(it->cls, rte_class_list, next);
+	cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+	if (cls != NULL) {
+		it->cls = cls;
+		goto end;
+	}
+	goto next_dev_on_bus;
+end:
+	it->device = dev;
+	return dev == NULL;
+}
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+	struct rte_bus *bus = NULL;
+	int old_errno = rte_errno;
+	char *bus_str = NULL;
+	char *cls_str = NULL;
+
+	rte_errno = 0;
+	if (it->bus_str == NULL && it->cls_str == NULL) {
+		/* Invalid iterator. */
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (it->bus != NULL)
+		bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+	if (it->bus_str != NULL) {
+		bus_str = dev_str_sane_copy(it->bus_str);
+		if (bus_str == NULL)
+			goto out;
+	}
+	if (it->cls_str != NULL) {
+		cls_str = dev_str_sane_copy(it->cls_str);
+		if (cls_str == NULL)
+			goto out;
+	}
+	while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+				   CTX(it, bus_str, cls_str)))) {
+		if (it->device != NULL) {
+			it->bus = bus;
+			goto out;
+		}
+		if (it->bus_str != NULL ||
+		    rte_errno != 0)
+			break;
+	}
+	if (rte_errno == 0)
+		rte_errno = old_errno;
+out:
+	free(bus_str);
+	free(cls_str);
+	return it->device;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index 810b3e18..dac2402a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -11,13 +11,22 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <stdarg.h>
 
+#include <rte_bus.h>
+#include <rte_class.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
 #include <rte_tailq.h>
 #include "eal_private.h"
 
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
 /** Global list of user devices */
 struct rte_devargs_list devargs_list =
 	TAILQ_HEAD_INITIALIZER(devargs_list);
@@ -52,22 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str,
 	return 0;
 }
 
+static size_t
+devargs_layer_count(const char *s)
+{
+	size_t i = s ? 1 : 0;
+
+	while (s != NULL && s[0] != '\0') {
+		i += s[0] == '/';
+		s++;
+	}
+	return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr)
+{
+	struct {
+		const char *key;
+		const char *str;
+		struct rte_kvargs *kvlist;
+	} layers[] = {
+		{ "bus=",    NULL, NULL, },
+		{ "class=",  NULL, NULL, },
+		{ "driver=", NULL, NULL, },
+	};
+	struct rte_kvargs_pair *kv = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+	const char *s = devstr;
+	size_t nblayer;
+	size_t i = 0;
+	int ret = 0;
+
+	/* Split each sub-lists. */
+	nblayer = devargs_layer_count(devstr);
+	if (nblayer > RTE_DIM(layers)) {
+		RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+			nblayer);
+		ret = -E2BIG;
+		goto get_out;
+	}
+
+	/* If the devargs points the devstr
+	 * as source data, then it should not allocate
+	 * anything and keep referring only to it.
+	 */
+	if (devargs->data != devstr) {
+		devargs->data = strdup(devstr);
+		if (devargs->data == NULL) {
+			RTE_LOG(ERR, EAL, "OOM\n");
+			ret = -ENOMEM;
+			goto get_out;
+		}
+		s = devargs->data;
+	}
+
+	while (s != NULL) {
+		if (i >= RTE_DIM(layers)) {
+			RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		/*
+		 * The last layer is free-form.
+		 * The "driver" key is not required (but accepted).
+		 */
+		if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+				i != RTE_DIM(layers) - 1)
+			goto next_layer;
+		layers[i].str = s;
+		layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+		if (layers[i].kvlist == NULL) {
+			RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		s = strchr(s, '/');
+		if (s != NULL)
+			s++;
+next_layer:
+		i++;
+	}
+
+	/* Parse each sub-list. */
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist == NULL)
+			continue;
+		kv = &layers[i].kvlist->pairs[0];
+		if (strcmp(kv->key, "bus") == 0) {
+			bus = rte_bus_find_by_name(kv->value);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "class") == 0) {
+			cls = rte_class_find_by_name(kv->value);
+			if (cls == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "driver") == 0) {
+			/* Ignore */
+			continue;
+		}
+	}
+
+	/* Fill devargs fields. */
+	devargs->bus_str = layers[0].str;
+	devargs->cls_str = layers[1].str;
+	devargs->drv_str = layers[2].str;
+	devargs->bus = bus;
+	devargs->cls = cls;
+
+	/* If we own the data, clean up a bit
+	 * the several layers string, to ease
+	 * their parsing afterward.
+	 */
+	if (devargs->data != devstr) {
+		char *s = (void *)(intptr_t)(devargs->data);
+
+		while ((s = strchr(s, '/'))) {
+			*s = '\0';
+			s++;
+		}
+	}
+
+get_out:
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist)
+			rte_kvargs_free(layers[i].kvlist);
+	}
+	if (ret != 0)
+		rte_errno = -ret;
+	return ret;
+}
+
 static int
 bus_name_cmp(const struct rte_bus *bus, const void *name)
 {
 	return strncmp(bus->name, name, strlen(bus->name));
 }
 
-int __rte_experimental
-rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
 {
 	struct rte_bus *bus = NULL;
 	const char *devname;
 	const size_t maxlen = sizeof(da->name);
 	size_t i;
 
-	if (dev == NULL || da == NULL)
+	if (da == NULL)
 		return -EINVAL;
+
 	/* Retrieve eventual bus info */
 	do {
 		devname = dev;
@@ -84,7 +235,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
 		da->name[i] = devname[i];
 		i++;
 		if (i == maxlen) {
-			fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+			RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
 				dev, maxlen);
 			da->name[i - 1] = '\0';
 			return -EINVAL;
@@ -94,7 +245,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
 	if (bus == NULL) {
 		bus = rte_bus_find_by_device_name(da->name);
 		if (bus == NULL) {
-			fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+			RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
 				da->name);
 			return -EFAULT;
 		}
@@ -106,18 +257,46 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
 	else
 		da->args = strdup("");
 	if (da->args == NULL) {
-		fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+		RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
 		return -ENOMEM;
 	}
 	return 0;
 }
 
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+	va_list ap;
+	size_t len;
+	char *dev;
+
+	if (da == NULL)
+		return -EINVAL;
+
+	va_start(ap, format);
+	len = vsnprintf(NULL, 0, format, ap);
+	va_end(ap);
+
+	dev = calloc(1, len + 1);
+	if (dev == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+		return -ENOMEM;
+	}
+
+	va_start(ap, format);
+	vsnprintf(dev, len + 1, format, ap);
+	va_end(ap);
+
+	return rte_devargs_parse(da, dev);
+}
+
 int __rte_experimental
-rte_eal_devargs_insert(struct rte_devargs *da)
+rte_devargs_insert(struct rte_devargs *da)
 {
 	int ret;
 
-	ret = rte_eal_devargs_remove(da->bus->name, da->name);
+	ret = rte_devargs_remove(da->bus->name, da->name);
 	if (ret < 0)
 		return ret;
 	TAILQ_INSERT_TAIL(&devargs_list, da, next);
@@ -125,8 +304,9 @@ rte_eal_devargs_insert(struct rte_devargs *da)
 }
 
 /* store a whitelist parameter for later parsing */
+__rte_experimental
 int
-rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
 {
 	struct rte_devargs *devargs = NULL;
 	struct rte_bus *bus = NULL;
@@ -137,7 +317,7 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
 	if (devargs == NULL)
 		goto fail;
 
-	if (rte_eal_devargs_parse(dev, devargs))
+	if (rte_devargs_parse(devargs, dev))
 		goto fail;
 	devargs->type = devtype;
 	bus = devargs->bus;
@@ -162,7 +342,7 @@ fail:
 }
 
 int __rte_experimental
-rte_eal_devargs_remove(const char *busname, const char *devname)
+rte_devargs_remove(const char *busname, const char *devname)
 {
 	struct rte_devargs *d;
 	void *tmp;
@@ -180,8 +360,9 @@ rte_eal_devargs_remove(const char *busname, const char *devname)
 }
 
 /* count the number of devices of a specified type */
+__rte_experimental
 unsigned int
-rte_eal_devargs_type_count(enum rte_devtype devtype)
+rte_devargs_type_count(enum rte_devtype devtype)
 {
 	struct rte_devargs *devargs;
 	unsigned int count = 0;
@@ -195,8 +376,9 @@ rte_eal_devargs_type_count(enum rte_devtype devtype)
 }
 
 /* dump the user devices on the console */
+__rte_experimental
 void
-rte_eal_devargs_dump(FILE *f)
+rte_devargs_dump(FILE *f)
 {
 	struct rte_devargs *devargs;
 
@@ -207,3 +389,23 @@ rte_eal_devargs_dump(FILE *f)
 			devargs->name, devargs->args);
 	}
 }
+
+/* bus-aware rte_devargs iterator. */
+__rte_experimental
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start)
+{
+	struct rte_devargs *da;
+
+	if (start != NULL)
+		da = TAILQ_NEXT(start, next);
+	else
+		da = TAILQ_FIRST(&devargs_list);
+	while (da != NULL) {
+		if (busname == NULL ||
+		    (strcmp(busname, da->bus->name) == 0))
+			return da;
+		da = TAILQ_NEXT(da, next);
+	}
+	return NULL;
+}
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 00000000..43caf3ce
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,1239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1ULL << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+	unsigned int n_masks;
+	uint64_t data[];
+};
+
+static size_t
+calc_mask_size(unsigned int len)
+{
+	/* mask must be multiple of MASK_ALIGN, even though length of array
+	 * itself may not be aligned on that boundary.
+	 */
+	len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+	return sizeof(struct used_mask) +
+			sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
+{
+	size_t data_sz = elt_sz * len;
+	size_t msk_sz = calc_mask_size(len);
+	return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
+{
+	return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+	char path[PATH_MAX];
+	void *map_addr;
+
+	if (ftruncate(fd, len)) {
+		RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+
+	map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, fd, 0);
+	if (map_addr != addr) {
+		RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+	return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+	    bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int msk_idx, lookahead_idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk, ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-1ULL << last_mod);
+
+	for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+		uint64_t cur_msk, lookahead_msk;
+		unsigned int run_start, clz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits for arbitrary n is
+		 * a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     rshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count leading zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     end of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits right at the start, so we will do
+		 *     (n-k-1) rshift-ands and check if first bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* combine current ignore mask with last index ignore mask */
+		if (msk_idx == last)
+			ignore_msk |= last_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			unsigned int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk >> 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				run_start = __builtin_ctzll(tmp_msk);
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count leading zeroes on inverted mask */
+		if (~cur_msk == 0)
+			clz = sizeof(cur_msk) * 8;
+		else
+			clz = __builtin_clzll(~cur_msk);
+
+		/* if there aren't any runs at the end either, just continue */
+		if (clz == 0)
+			continue;
+
+		/* we have a partial run at the end, so try looking ahead */
+		run_start = MASK_ALIGN - clz;
+		left -= clz;
+
+		for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+				lookahead_idx++) {
+			unsigned int s_idx, need;
+			lookahead_msk = msk->data[lookahead_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookahead_msk = ~lookahead_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookahead_msk &= lookahead_msk >> 1ULL;
+
+			/* if first bit is not set, we've lost the run */
+			if ((lookahead_msk & 1) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookahead-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = ~((1ULL << need) - 1);
+				msk_idx = lookahead_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		}
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		return MASK_GET_IDX(msk_idx, run_start);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk, ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	for (idx = first; idx < msk->n_masks; idx++) {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find first set bit - that will correspond to whatever it is
+		 * that we're looking for.
+		 */
+		found = __builtin_ctzll(cur);
+		return MASK_GET_IDX(idx, found);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int last, last_mod;
+	uint64_t last_msk;
+	unsigned int need_len, result = 0;
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+		uint64_t cur = msk->data[idx];
+		unsigned int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* if this is last mask, ignore everything after last bit */
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first) {
+			cur >>= first_mod;
+			/* at the start, we don't need the full mask len */
+			need_len -= first_mod;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * see if current run ends before mask end.
+		 */
+		run_len = __builtin_ctzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+	}
+	return result;
+}
+
+static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int msk_idx, lookbehind_idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	msk_idx = first;
+	do {
+		uint64_t cur_msk, lookbehind_msk;
+		unsigned int run_start, run_end, ctz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits from the top for
+		 * arbitrary n is a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     lshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count trailing zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     start of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits at the end, so we will do (n-k-1)
+		 *     lshift-ands and check if last bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			unsigned int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk << 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				/* clz will give us offset from end of mask, and
+				 * we only get the end of our run, not start,
+				 * so adjust result to point to where start
+				 * would have been.
+				 */
+				run_start = MASK_ALIGN -
+						__builtin_clzll(tmp_msk) - n;
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count trailing zeroes on inverted mask */
+		if (~cur_msk == 0)
+			ctz = sizeof(cur_msk) * 8;
+		else
+			ctz = __builtin_ctzll(~cur_msk);
+
+		/* if there aren't any runs at the start either, just
+		 * continue
+		 */
+		if (ctz == 0)
+			continue;
+
+		/* we have a partial run at the start, so try looking behind */
+		run_end = MASK_GET_IDX(msk_idx, ctz);
+		left -= ctz;
+
+		/* go backwards, include zero */
+		lookbehind_idx = msk_idx - 1;
+
+		/* we can't lookbehind as we've run out of masks, so stop */
+		if (msk_idx == 0)
+			break;
+
+		do {
+			const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+			unsigned int s_idx, need;
+
+			lookbehind_msk = msk->data[lookbehind_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookbehind_msk = ~lookbehind_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookbehind_msk &= lookbehind_msk << 1ULL;
+
+			/* if last bit is not set, we've lost the run */
+			if ((lookbehind_msk & last_bit) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookbehind-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = -1ULL << need;
+				msk_idx = lookbehind_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		} while ((lookbehind_idx--) != 0); /* decrement after check to
+						    * include zero
+						    */
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		/* we've found what we were looking for, but we only know where
+		 * the run ended, so calculate start position.
+		 */
+		return run_end - n;
+	} while (msk_idx-- != 0); /* decrement after check to include zero */
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing clz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find last set bit - that will correspond to whatever it is
+		 * that we're looking for. we're counting trailing zeroes, thus
+		 * the value we get is counted from end of mask, so calculate
+		 * position from start of mask.
+		 */
+		found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+		return MASK_GET_IDX(idx, found);
+	} while (idx-- != 0); /* decrement after check  to include zero*/
+
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int need_len, result = 0;
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		unsigned int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything after start on first iteration */
+		if (idx == first) {
+			unsigned int end_len = MASK_ALIGN - first_mod - 1;
+			cur <<= end_len;
+			/* at the start, we don't need the full mask len */
+			need_len -= end_len;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			goto endloop;
+
+		/*
+		 * see where run ends, starting from the end.
+		 */
+		run_len = __builtin_clzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+endloop:
+		result += need_len;
+	} while (idx-- != 0); /* decrement after check to include zero */
+	return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
+{
+	struct used_mask *msk;
+	uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+	unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
+	bool already_used;
+	int ret = -1;
+
+	if (arr == NULL || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	ret = 0;
+
+	/* prevent array from changing under us */
+	rte_rwlock_write_lock(&arr->rwlock);
+
+	already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+	/* nothing to be done */
+	if (used == already_used)
+		goto out;
+
+	if (used) {
+		msk->data[msk_idx] |= msk_bit;
+		arr->count++;
+	} else {
+		msk->data[msk_idx] &= ~msk_bit;
+		arr->count--;
+	}
+out:
+	rte_rwlock_write_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+	if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	return 0;
+}
+
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+		unsigned int elt_sz)
+{
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	struct used_mask *msk;
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (fully_validate(name, elt_sz, len))
+		return -1;
+
+	page_sz = sysconf(_SC_PAGESIZE);
+	if (page_sz == (size_t)-1)
+		goto fail;
+
+	/* calculate our memory limits */
+	mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+	data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+	if (data == NULL)
+		goto fail;
+
+	if (internal_config.no_shconf) {
+		/* remap virtual area as writable */
+		void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+				MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (new_data == MAP_FAILED) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+					__func__, strerror(errno));
+			goto fail;
+		}
+	} else {
+		eal_get_fbarray_path(path, sizeof(path), name);
+
+		/*
+		 * Each fbarray is unique to process namespace, i.e. the
+		 * filename depends on process prefix. Try to take out a lock
+		 * and see if we succeed. If we don't, someone else is using it
+		 * already.
+		 */
+		fd = open(path, O_CREAT | O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = errno;
+			goto fail;
+		} else if (flock(fd, LOCK_EX | LOCK_NB)) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = EBUSY;
+			goto fail;
+		}
+
+		/* take out a non-exclusive lock, so that other processes could
+		 * still attach to it, but no other process could reinitialize
+		 * it.
+		 */
+		if (flock(fd, LOCK_SH | LOCK_NB)) {
+			rte_errno = errno;
+			goto fail;
+		}
+
+		if (resize_and_map(fd, data, mmap_len))
+			goto fail;
+
+		/* we've mmap'ed the file, we can now close the fd */
+		close(fd);
+	}
+
+	/* initialize the data */
+	memset(data, 0, mmap_len);
+
+	/* populate data structure */
+	strlcpy(arr->name, name, sizeof(arr->name));
+	arr->data = data;
+	arr->len = len;
+	arr->elt_sz = elt_sz;
+	arr->count = 0;
+
+	msk = get_used_mask(data, elt_sz, len);
+	msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+	rte_rwlock_init(&arr->rwlock);
+
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize attach as two values we need (element
+	 * size and array length) are constant for the duration of life of
+	 * the array, so the parts we care about will not race.
+	 */
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len))
+		return -1;
+
+	page_sz = sysconf(_SC_PAGESIZE);
+	if (page_sz == (size_t)-1)
+		goto fail;
+
+	mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+	data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+	if (data == NULL)
+		goto fail;
+
+	eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	/* lock the file, to let others know we're using it */
+	if (flock(fd, LOCK_SH | LOCK_NB)) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	if (resize_and_map(fd, data, mmap_len))
+		goto fail;
+
+	close(fd);
+
+	/* we're done */
+
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize detach as two values we need (element
+	 * size and total capacity) are constant for the duration of life of
+	 * the array, so the parts we care about will not race. if the user is
+	 * detaching while doing something else in the same process, we can't
+	 * really do anything about it, things will blow up either way.
+	 */
+
+	size_t page_sz = sysconf(_SC_PAGESIZE);
+
+	if (page_sz == (size_t)-1)
+		return -1;
+
+	/* this may already be unmapped (e.g. repeated call from previously
+	 * failed destroy(), but this is on user, we can't (easily) know if this
+	 * is still mapped.
+	 */
+	munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len));
+
+	return 0;
+}
+
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+	int fd, ret;
+	char path[PATH_MAX];
+
+	ret = rte_fbarray_detach(arr);
+	if (ret)
+		return ret;
+
+	/* try deleting the file */
+	eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n",
+			strerror(errno));
+		return -1;
+	}
+	if (flock(fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+		rte_errno = EBUSY;
+		ret = -1;
+	} else {
+		ret = 0;
+		unlink(path);
+		memset(arr, 0, sizeof(*arr));
+	}
+	close(fd);
+
+	return ret;
+}
+
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
+{
+	void *ret = NULL;
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (idx >= arr->len) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
+{
+	return set_used(arr, idx, true);
+}
+
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
+{
+	return set_used(arr, idx, false);
+}
+
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
+{
+	struct used_mask *msk;
+	int msk_idx;
+	uint64_t msk_bit;
+	int ret = -1;
+
+	if (arr == NULL || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	msk_idx = MASK_LEN_TO_IDX(idx);
+	msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+	ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+	rte_rwlock_read_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = start;
+			goto out;
+		}
+	} else {
+		if (arr->count == 0) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->len == arr->count) {
+			ret = start;
+			goto out;
+		}
+	}
+	if (next)
+		ret = find_next(arr, start, used);
+	else
+		ret = find_prev(arr, start, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool next, bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (next && (arr->len - start) < n) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
+	if (!next && start < (n - 1)) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count || arr->len - arr->count < n) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
+	} else {
+		if (arr->count < n) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->count == arr->len) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
+	}
+
+	if (next)
+		ret = find_next_n(arr, start, n, used);
+	else
+		ret = find_prev_n(arr, start, n, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+		bool used)
+{
+	int ret = -1;
+
+	if (arr == NULL || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	/* cheap checks to prevent doing useless work */
+	if (used) {
+		if (arr->count == 0) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == arr->len) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == arr->len) {
+			ret = start + 1;
+			goto out;
+		}
+	} else {
+		if (arr->len == arr->count) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == 0) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == 0) {
+			ret = start + 1;
+			goto out;
+		}
+	}
+
+	if (next)
+		ret = find_contig(arr, start, used);
+	else
+		ret = find_rev_contig(arr, start, used);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+	void *end;
+	int ret = -1;
+
+	/*
+	 * no need to synchronize as it doesn't matter if underlying data
+	 * changes - we're doing pointer arithmetic here.
+	 */
+
+	if (arr == NULL || elt == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+	if (elt < arr->data || elt >= end) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+	return ret;
+}
+
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+	struct used_mask *msk;
+	unsigned int i;
+
+	if (arr == NULL || f == NULL) {
+		rte_errno = EINVAL;
+		return;
+	}
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+		fprintf(f, "Invalid file-backed array\n");
+		goto out;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	fprintf(f, "File-backed array: %s\n", arr->name);
+	fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+			arr->len, arr->count, arr->elt_sz);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+	for (i = 0; i < msk->n_masks; i++)
+		fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/lib/librte_eal/common/eal_common_hypervisor.c b/lib/librte_eal/common/eal_common_hypervisor.c
index c3b4c621..5388b81a 100644
--- a/lib/librte_eal/common/eal_common_hypervisor.c
+++ b/lib/librte_eal/common/eal_common_hypervisor.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 #include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 7724fa43..3167e9d7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -7,6 +7,7 @@
 #include <string.h>
 #include <dirent.h>
 
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_eal.h>
 #include <rte_lcore.h>
@@ -16,6 +17,19 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
+static int
+socket_id_cmp(const void *a, const void *b)
+{
+	const int *lcore_id_a = a;
+	const int *lcore_id_b = b;
+
+	if (*lcore_id_a < *lcore_id_b)
+		return -1;
+	if (*lcore_id_a > *lcore_id_b)
+		return 1;
+	return 0;
+}
+
 /*
  * Parse /sys/devices/system/cpu to get the number of physical and logical
  * processors on the machine. The function will fill the cpu_info
@@ -28,6 +42,8 @@ rte_eal_cpu_init(void)
 	struct rte_config *config = rte_eal_get_configuration();
 	unsigned lcore_id;
 	unsigned count = 0;
+	unsigned int socket_id, prev_socket_id;
+	int lcore_to_socket_id[RTE_MAX_LCORE];
 
 	/*
 	 * Parse the maximum set of logical cores, detect the subset of running
@@ -39,6 +55,19 @@ rte_eal_cpu_init(void)
 		/* init cpuset for per lcore config */
 		CPU_ZERO(&lcore_config[lcore_id].cpuset);
 
+		/* find socket first */
+		socket_id = eal_cpu_socket_id(lcore_id);
+		if (socket_id >= RTE_MAX_NUMA_NODES) {
+#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
+			socket_id = 0;
+#else
+			RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than RTE_MAX_NUMA_NODES (%d)\n",
+					socket_id, RTE_MAX_NUMA_NODES);
+			return -1;
+#endif
+		}
+		lcore_to_socket_id[lcore_id] = socket_id;
+
 		/* in 1:1 mapping, record related cpu detected state */
 		lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id);
 		if (lcore_config[lcore_id].detected == 0) {
@@ -54,18 +83,7 @@ rte_eal_cpu_init(void)
 		config->lcore_role[lcore_id] = ROLE_RTE;
 		lcore_config[lcore_id].core_role = ROLE_RTE;
 		lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
-		lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id);
-		if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) {
-#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
-			lcore_config[lcore_id].socket_id = 0;
-#else
-			RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than "
-				"RTE_MAX_NUMA_NODES (%d)\n",
-				lcore_config[lcore_id].socket_id,
-				RTE_MAX_NUMA_NODES);
-			return -1;
-#endif
-		}
+		lcore_config[lcore_id].socket_id = socket_id;
 		RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
 				"core %u on socket %u\n",
 				lcore_id, lcore_config[lcore_id].core_id,
@@ -79,5 +97,38 @@ rte_eal_cpu_init(void)
 		RTE_MAX_LCORE);
 	RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
 
+	/* sort all socket id's in ascending order */
+	qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id),
+			sizeof(lcore_to_socket_id[0]), socket_id_cmp);
+
+	prev_socket_id = -1;
+	config->numa_node_count = 0;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		socket_id = lcore_to_socket_id[lcore_id];
+		if (socket_id != prev_socket_id)
+			config->numa_nodes[config->numa_node_count++] =
+					socket_id;
+		prev_socket_id = socket_id;
+	}
+	RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count);
+
 	return 0;
 }
+
+unsigned int __rte_experimental
+rte_socket_count(void)
+{
+	const struct rte_config *config = rte_eal_get_configuration();
+	return config->numa_node_count;
+}
+
+int __rte_experimental
+rte_socket_id_by_idx(unsigned int idx)
+{
+	const struct rte_config *config = rte_eal_get_configuration();
+	if (idx >= config->numa_node_count) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return config->numa_nodes[idx];
+}
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 37b2e20e..c714a4bd 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -9,6 +9,7 @@
 #include <string.h>
 #include <errno.h>
 #include <regex.h>
+#include <fnmatch.h>
 
 #include <rte_eal.h>
 #include <rte_log.h>
@@ -23,6 +24,23 @@ struct rte_logs rte_logs = {
 	.file = NULL,
 };
 
+struct rte_eal_opt_loglevel {
+	/** Next list entry */
+	TAILQ_ENTRY(rte_eal_opt_loglevel) next;
+	/** Compiled regular expression obtained from the option */
+	regex_t re_match;
+	/** Glob match string option */
+	char *pattern;
+	/** Log level value obtained from the option */
+	uint32_t level;
+};
+
+TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel);
+
+/** List of valid EAL log level options */
+static struct rte_eal_opt_loglevel_list opt_loglevel_list =
+	TAILQ_HEAD_INITIALIZER(opt_loglevel_list);
+
 /* Stream to use for logging if rte_logs.file is NULL */
 static FILE *default_log_stream;
 
@@ -89,9 +107,9 @@ rte_log_set_level(uint32_t type, uint32_t level)
 	return 0;
 }
 
-/* set level */
+/* set log level by regular expression */
 int
-rte_log_set_level_regexp(const char *pattern, uint32_t level)
+rte_log_set_level_regexp(const char *regex, uint32_t level)
 {
 	regex_t r;
 	size_t i;
@@ -99,7 +117,7 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level)
 	if (level > RTE_LOG_DEBUG)
 		return -1;
 
-	if (regcomp(&r, pattern, 0) != 0)
+	if (regcomp(&r, regex, 0) != 0)
 		return -1;
 
 	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
@@ -115,6 +133,69 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level)
 	return 0;
 }
 
+/*
+ * Save the type string and the loglevel for later dynamic
+ * logtypes which may register later.
+ */
+static int rte_log_save_level(int priority,
+			      const char *regex, const char *pattern)
+{
+	struct rte_eal_opt_loglevel *opt_ll = NULL;
+
+	opt_ll = malloc(sizeof(*opt_ll));
+	if (opt_ll == NULL)
+		goto fail;
+
+	opt_ll->level = priority;
+
+	if (regex) {
+		opt_ll->pattern = NULL;
+		if (regcomp(&opt_ll->re_match, regex, 0) != 0)
+			goto fail;
+	} else if (pattern) {
+		opt_ll->pattern = strdup(pattern);
+		if (opt_ll->pattern == NULL)
+			goto fail;
+	} else
+		goto fail;
+
+	TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next);
+	return 0;
+fail:
+	free(opt_ll);
+	return -1;
+}
+
+int rte_log_save_regexp(const char *regex, int tmp)
+{
+	return rte_log_save_level(tmp, regex, NULL);
+}
+
+/* set log level based on glob (file match) pattern */
+int
+rte_log_set_level_pattern(const char *pattern, uint32_t level)
+{
+	size_t i;
+
+	if (level > RTE_LOG_DEBUG)
+		return -1;
+
+	for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+		if (rte_logs.dynamic_types[i].name == NULL)
+			continue;
+
+		if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0)
+			rte_logs.dynamic_types[i].loglevel = level;
+	}
+
+	return 0;
+}
+
+int rte_log_save_pattern(const char *pattern, int priority)
+{
+	return rte_log_save_level(priority, NULL, pattern);
+}
+
 /* get the current loglevel for the message being processed */
 int rte_log_cur_msg_loglevel(void)
 {
@@ -186,6 +267,36 @@ rte_log_register(const char *name)
 	return ret;
 }
 
+/* Register an extended log type and try to pick its level from EAL options */
+int __rte_experimental
+rte_log_register_type_and_pick_level(const char *name, uint32_t level_def)
+{
+	struct rte_eal_opt_loglevel *opt_ll;
+	uint32_t level = level_def;
+	int type;
+
+	type = rte_log_register(name);
+	if (type < 0)
+		return type;
+
+	TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) {
+		if (opt_ll->level > RTE_LOG_DEBUG)
+			continue;
+
+		if (opt_ll->pattern) {
+			if (fnmatch(opt_ll->pattern, name, 0))
+				level = opt_ll->level;
+		} else {
+			if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0)
+				level = opt_ll->level;
+		}
+	}
+
+	rte_logs.dynamic_types[type].loglevel = level;
+
+	return type;
+}
+
 struct logtype {
 	uint32_t log_id;
 	const char *logtype;
@@ -224,9 +335,7 @@ static const struct logtype logtype_strings[] = {
 };
 
 /* Logging should be first initializer (before drivers and bus) */
-RTE_INIT_PRIO(rte_log_init, 101);
-static void
-rte_log_init(void)
+RTE_INIT_PRIO(rte_log_init, LOG)
 {
 	uint32_t i;
 
diff --git a/lib/librte_eal/common/eal_common_memalloc.c b/lib/librte_eal/common/eal_common_memalloc.c
new file mode 100644
index 00000000..1d41ea11
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memalloc.c
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_fbarray.h>
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+
+struct mem_event_callback_entry {
+	TAILQ_ENTRY(mem_event_callback_entry) next;
+	char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN];
+	rte_mem_event_callback_t clb;
+	void *arg;
+};
+
+struct mem_alloc_validator_entry {
+	TAILQ_ENTRY(mem_alloc_validator_entry) next;
+	char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN];
+	rte_mem_alloc_validator_t clb;
+	int socket_id;
+	size_t limit;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry);
+TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry);
+
+static struct mem_event_callback_entry_list mem_event_callback_list =
+	TAILQ_HEAD_INITIALIZER(mem_event_callback_list);
+static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_alloc_validator_entry_list mem_alloc_validator_list =
+	TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list);
+static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_event_callback_entry *
+find_mem_event_callback(const char *name, void *arg)
+{
+	struct mem_event_callback_entry *r;
+
+	TAILQ_FOREACH(r, &mem_event_callback_list, next) {
+		if (!strcmp(r->name, name) && r->arg == arg)
+			break;
+	}
+	return r;
+}
+
+static struct mem_alloc_validator_entry *
+find_mem_alloc_validator(const char *name, int socket_id)
+{
+	struct mem_alloc_validator_entry *r;
+
+	TAILQ_FOREACH(r, &mem_alloc_validator_list, next) {
+		if (!strcmp(r->name, name) && r->socket_id == socket_id)
+			break;
+	}
+	return r;
+}
+
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+		size_t len)
+{
+	void *end, *aligned_start, *aligned_end;
+	size_t pgsz = (size_t)msl->page_sz;
+	const struct rte_memseg *ms;
+
+	/* for IOVA_VA, it's always contiguous */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA)
+		return true;
+
+	/* for legacy memory, it's always contiguous */
+	if (internal_config.legacy_mem)
+		return true;
+
+	end = RTE_PTR_ADD(start, len);
+
+	/* for nohuge, we check pagemap, otherwise check memseg */
+	if (!rte_eal_has_hugepages()) {
+		rte_iova_t cur, expected;
+
+		aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+		aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+		/* if start and end are on the same page, bail out early */
+		if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+			return true;
+
+		/* skip first iteration */
+		cur = rte_mem_virt2iova(aligned_start);
+		expected = cur + pgsz;
+		aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+
+		while (aligned_start < aligned_end) {
+			cur = rte_mem_virt2iova(aligned_start);
+			if (cur != expected)
+				return false;
+			aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+			expected += pgsz;
+		}
+	} else {
+		int start_seg, end_seg, cur_seg;
+		rte_iova_t cur, expected;
+
+		aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+		aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+		start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) /
+				pgsz;
+		end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) /
+				pgsz;
+
+		/* if start and end are on the same page, bail out early */
+		if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+			return true;
+
+		/* skip first iteration */
+		ms = rte_fbarray_get(&msl->memseg_arr, start_seg);
+		cur = ms->iova;
+		expected = cur + pgsz;
+
+		/* if we can't access IOVA addresses, assume non-contiguous */
+		if (cur == RTE_BAD_IOVA)
+			return false;
+
+		for (cur_seg = start_seg + 1; cur_seg < end_seg;
+				cur_seg++, expected += pgsz) {
+			ms = rte_fbarray_get(&msl->memseg_arr, cur_seg);
+
+			if (ms->iova != expected)
+				return false;
+		}
+	}
+	return true;
+}
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+		rte_mem_event_callback_t clb, void *arg)
+{
+	struct mem_event_callback_entry *entry;
+	int ret, len;
+	if (name == NULL || clb == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_event_rwlock);
+
+	entry = find_mem_event_callback(name, arg);
+	if (entry != NULL) {
+		rte_errno = EEXIST;
+		ret = -1;
+		goto unlock;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* callback successfully created and is valid, add it to the list */
+	entry->clb = clb;
+	entry->arg = arg;
+	strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n",
+			name, arg);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_event_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg)
+{
+	struct mem_event_callback_entry *entry;
+	int ret, len;
+
+	if (name == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_event_rwlock);
+
+	entry = find_mem_event_callback(name, arg);
+	if (entry == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	TAILQ_REMOVE(&mem_event_callback_list, entry, next);
+	free(entry);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n",
+			name, arg);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_event_rwlock);
+	return ret;
+}
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+		size_t len)
+{
+	struct mem_event_callback_entry *entry;
+
+	rte_rwlock_read_lock(&mem_event_rwlock);
+
+	TAILQ_FOREACH(entry, &mem_event_callback_list, next) {
+		RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n",
+			entry->name, entry->arg);
+		entry->clb(event, start, len, entry->arg);
+	}
+
+	rte_rwlock_read_unlock(&mem_event_rwlock);
+}
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret, len;
+	if (name == NULL || clb == NULL || socket_id < 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+	entry = find_mem_alloc_validator(name, socket_id);
+	if (entry != NULL) {
+		rte_errno = EEXIST;
+		ret = -1;
+		goto unlock;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* callback successfully created and is valid, add it to the list */
+	entry->clb = clb;
+	entry->socket_id = socket_id;
+	entry->limit = limit;
+	strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n",
+		name, socket_id, limit);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret, len;
+
+	if (name == NULL || socket_id < 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	} else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+	entry = find_mem_alloc_validator(name, socket_id);
+	if (entry == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	TAILQ_REMOVE(&mem_alloc_validator_list, entry, next);
+	free(entry);
+
+	ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n",
+		name, socket_id);
+
+unlock:
+	rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+	return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len)
+{
+	struct mem_alloc_validator_entry *entry;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&mem_alloc_validator_rwlock);
+
+	TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) {
+		if (entry->socket_id != socket_id || entry->limit > new_len)
+			continue;
+		RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n",
+			entry->name, entry->socket_id);
+		if (entry->clb(socket_id, entry->limit, new_len) < 0)
+			ret = -1;
+	}
+
+	rte_rwlock_read_unlock(&mem_alloc_validator_rwlock);
+
+	return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 852f3bb9..fbfb1b05 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -2,82 +2,385 @@
  * Copyright(c) 2010-2014 Intel Corporation
  */
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <string.h>
 #include <unistd.h>
 #include <inttypes.h>
 #include <sys/mman.h>
 #include <sys/queue.h>
 
+#include <rte_fbarray.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
+#include <rte_errno.h>
 #include <rte_log.h>
 
+#include "eal_memalloc.h"
 #include "eal_private.h"
 #include "eal_internal_cfg.h"
 
 /*
- * Return a pointer to a read-only table of struct rte_physmem_desc
- * elements, containing the layout of all addressable physical
- * memory. The last element of the table contains a NULL address.
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
  */
-const struct rte_memseg *
-rte_eal_get_physmem_layout(void)
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+
+static void *next_baseaddr;
+static uint64_t system_page_sz;
+
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+		size_t page_sz, int flags, int mmap_flags)
+{
+	bool addr_is_hint, allow_shrink, unmap, no_align;
+	uint64_t map_sz;
+	void *mapped_addr, *aligned_addr;
+
+	if (system_page_sz == 0)
+		system_page_sz = sysconf(_SC_PAGESIZE);
+
+	mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+	addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
+	allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
+	unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
+
+	if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+			rte_eal_process_type() == RTE_PROC_PRIMARY)
+		next_baseaddr = (void *) internal_config.base_virtaddr;
+
+	if (requested_addr == NULL && next_baseaddr != NULL) {
+		requested_addr = next_baseaddr;
+		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
+		addr_is_hint = true;
+	}
+
+	/* we don't need alignment of resulting pointer in the following cases:
+	 *
+	 * 1. page size is equal to system size
+	 * 2. we have a requested address, and it is page-aligned, and we will
+	 *    be discarding the address if we get a different one.
+	 *
+	 * for all other cases, alignment is potentially necessary.
+	 */
+	no_align = (requested_addr != NULL &&
+		requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+		!addr_is_hint) ||
+		page_sz == system_page_sz;
+
+	do {
+		map_sz = no_align ? *size : *size + page_sz;
+		if (map_sz > SIZE_MAX) {
+			RTE_LOG(ERR, EAL, "Map size too big\n");
+			rte_errno = E2BIG;
+			return NULL;
+		}
+
+		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ,
+				mmap_flags, -1, 0);
+		if (mapped_addr == MAP_FAILED && allow_shrink)
+			*size -= page_sz;
+	} while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+	/* align resulting address - if map failed, we will ignore the value
+	 * anyway, so no need to add additional checks.
+	 */
+	aligned_addr = no_align ? mapped_addr :
+			RTE_PTR_ALIGN(mapped_addr, page_sz);
+
+	if (*size == 0) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
+			strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	} else if (mapped_addr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+			strerror(errno));
+		/* pass errno up the call chain */
+		rte_errno = errno;
+		return NULL;
+	} else if (requested_addr != NULL && !addr_is_hint &&
+			aligned_addr != requested_addr) {
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
+			requested_addr, aligned_addr);
+		munmap(mapped_addr, map_sz);
+		rte_errno = EADDRNOTAVAIL;
+		return NULL;
+	} else if (requested_addr != NULL && addr_is_hint &&
+			aligned_addr != requested_addr) {
+		RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
+			requested_addr, aligned_addr);
+		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory into secondary processes\n");
+	} else if (next_baseaddr != NULL) {
+		next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
+	}
+
+	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+		aligned_addr, *size);
+
+	if (unmap) {
+		munmap(mapped_addr, map_sz);
+	} else if (!no_align) {
+		void *map_end, *aligned_end;
+		size_t before_len, after_len;
+
+		/* when we reserve space with alignment, we add alignment to
+		 * mapping size. On 32-bit, if 1GB alignment was requested, this
+		 * would waste 1GB of address space, which is a luxury we cannot
+		 * afford. so, if alignment was performed, check if any unneeded
+		 * address space can be unmapped back.
+		 */
+
+		map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+		aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+		/* unmap space before aligned mmap address */
+		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+		if (before_len > 0)
+			munmap(mapped_addr, before_len);
+
+		/* unmap space after aligned end mmap address */
+		after_len = RTE_PTR_DIFF(map_end, aligned_end);
+		if (after_len > 0)
+			munmap(aligned_end, after_len);
+	}
+
+	return aligned_addr;
+}
+
+static struct rte_memseg *
+virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+	const struct rte_fbarray *arr;
+	void *start, *end;
+	int ms_idx;
+
+	if (msl == NULL)
+		return NULL;
+
+	/* a memseg list was specified, check if it's the right one */
+	start = msl->base_va;
+	end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
+
+	if (addr < start || addr >= end)
+		return NULL;
+
+	/* now, calculate index */
+	arr = &msl->memseg_arr;
+	ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
+	return rte_fbarray_get(arr, ms_idx);
+}
+
+static struct rte_memseg_list *
+virt2memseg_list(const void *addr)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	int msl_idx;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		void *start, *end;
+		msl = &mcfg->memsegs[msl_idx];
+
+		start = msl->base_va;
+		end = RTE_PTR_ADD(start,
+				(size_t)msl->page_sz * msl->memseg_arr.len);
+		if (addr >= start && addr < end)
+			break;
+	}
+	/* if we didn't find our memseg list */
+	if (msl_idx == RTE_MAX_MEMSEG_LISTS)
+		return NULL;
+	return msl;
+}
+
+__rte_experimental struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *addr)
+{
+	return virt2memseg_list(addr);
+}
+
+struct virtiova {
+	rte_iova_t iova;
+	void *virt;
+};
+static int
+find_virt(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct virtiova *vi = arg;
+	if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
+		size_t offset = vi->iova - ms->iova;
+		vi->virt = RTE_PTR_ADD(ms->addr, offset);
+		/* stop the walk */
+		return 1;
+	}
+	return 0;
+}
+static int
+find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, size_t len, void *arg)
 {
-	return rte_eal_get_configuration()->mem_config->memseg;
+	struct virtiova *vi = arg;
+	if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
+		size_t offset = vi->iova - ms->iova;
+		vi->virt = RTE_PTR_ADD(ms->addr, offset);
+		/* stop the walk */
+		return 1;
+	}
+	return 0;
 }
 
+__rte_experimental void *
+rte_mem_iova2virt(rte_iova_t iova)
+{
+	struct virtiova vi;
+
+	memset(&vi, 0, sizeof(vi));
+
+	vi.iova = iova;
+	/* for legacy mem, we can get away with scanning VA-contiguous segments,
+	 * as we know they are PA-contiguous as well
+	 */
+	if (internal_config.legacy_mem)
+		rte_memseg_contig_walk(find_virt_legacy, &vi);
+	else
+		rte_memseg_walk(find_virt, &vi);
+
+	return vi.virt;
+}
+
+__rte_experimental struct rte_memseg *
+rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+	return virt2memseg(addr, msl != NULL ? msl :
+			rte_mem_virt2memseg_list(addr));
+}
+
+static int
+physmem_size(const struct rte_memseg_list *msl, void *arg)
+{
+	uint64_t *total_len = arg;
+
+	*total_len += msl->memseg_arr.count * msl->page_sz;
+
+	return 0;
+}
 
 /* get the total size of memory */
 uint64_t
 rte_eal_get_physmem_size(void)
 {
-	const struct rte_mem_config *mcfg;
-	unsigned i = 0;
 	uint64_t total_len = 0;
 
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
+	rte_memseg_list_walk(physmem_size, &total_len);
 
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		if (mcfg->memseg[i].addr == NULL)
-			break;
+	return total_len;
+}
 
-		total_len += mcfg->memseg[i].len;
-	}
+static int
+dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx, ms_idx;
+	FILE *f = arg;
 
-	return total_len;
+	msl_idx = msl - mcfg->memsegs;
+	if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+		return -1;
+
+	ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+	if (ms_idx < 0)
+		return -1;
+
+	fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
+			"virt:%p, socket_id:%"PRId32", "
+			"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+			"nrank:%"PRIx32"\n",
+			msl_idx, ms_idx,
+			ms->iova,
+			ms->len,
+			ms->addr,
+			ms->socket_id,
+			ms->hugepage_sz,
+			ms->nchannel,
+			ms->nrank);
+
+	return 0;
 }
 
-/* Dump the physical memory layout on console */
-void
-rte_dump_physmem_layout(FILE *f)
+/*
+ * Defining here because declared in rte_memory.h, but the actual implementation
+ * is in eal_common_memalloc.c, like all other memalloc internals.
+ */
+int __rte_experimental
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+		void *arg)
 {
-	const struct rte_mem_config *mcfg;
-	unsigned i = 0;
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_event_callback_register(name, clb, arg);
+}
 
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
+int __rte_experimental
+rte_mem_event_callback_unregister(const char *name, void *arg)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_event_callback_unregister(name, arg);
+}
 
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		if (mcfg->memseg[i].addr == NULL)
-			break;
+int __rte_experimental
+rte_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+	return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
+			limit);
+}
 
-		fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, "
-		       "virt:%p, socket_id:%"PRId32", "
-		       "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
-		       "nrank:%"PRIx32"\n", i,
-		       mcfg->memseg[i].iova,
-		       mcfg->memseg[i].len,
-		       mcfg->memseg[i].addr,
-		       mcfg->memseg[i].socket_id,
-		       mcfg->memseg[i].hugepage_sz,
-		       mcfg->memseg[i].nchannel,
-		       mcfg->memseg[i].nrank);
+int __rte_experimental
+rte_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+	/* FreeBSD boots with legacy mem enabled by default */
+	if (internal_config.legacy_mem) {
+		RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+		rte_errno = ENOTSUP;
+		return -1;
 	}
+	return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+	rte_memseg_walk(dump_memseg, f);
 }
 
 /* return the number of memory channels */
@@ -117,20 +420,165 @@ rte_mem_lock_page(const void *virt)
 	return mlock((void *)aligned, page_size);
 }
 
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ms_idx, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+		const struct rte_memseg *ms;
+		struct rte_fbarray *arr;
+
+		if (msl->memseg_arr.count == 0)
+			continue;
+
+		arr = &msl->memseg_arr;
+
+		ms_idx = rte_fbarray_find_next_used(arr, 0);
+		while (ms_idx >= 0) {
+			int n_segs;
+			size_t len;
+
+			ms = rte_fbarray_get(arr, ms_idx);
+
+			/* find how many more segments there are, starting with
+			 * this one.
+			 */
+			n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
+			len = n_segs * msl->page_sz;
+
+			ret = func(msl, ms, len, arg);
+			if (ret)
+				return ret;
+			ms_idx = rte_fbarray_find_next_used(arr,
+					ms_idx + n_segs);
+		}
+	}
+	return 0;
+}
+
+int __rte_experimental
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ms_idx, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+		const struct rte_memseg *ms;
+		struct rte_fbarray *arr;
+
+		if (msl->memseg_arr.count == 0)
+			continue;
+
+		arr = &msl->memseg_arr;
+
+		ms_idx = rte_fbarray_find_next_used(arr, 0);
+		while (ms_idx >= 0) {
+			ms = rte_fbarray_get(arr, ms_idx);
+			ret = func(msl, ms, arg);
+			if (ret)
+				return ret;
+			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+		}
+	}
+	return 0;
+}
+
+int __rte_experimental
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_walk_thread_unsafe(func, arg);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ret = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+		if (msl->base_va == NULL)
+			continue;
+
+		ret = func(msl, arg);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_list_walk_thread_unsafe(func, arg);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
 /* init memory subsystem */
 int
 rte_eal_memory_init(void)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int retval;
 	RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
 
-	const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+	if (!mcfg)
+		return -1;
+
+	/* lock mem hotplug here, to prevent races while we init */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	if (rte_eal_memseg_init() < 0)
+		goto fail;
+
+	if (eal_memalloc_init() < 0)
+		goto fail;
+
+	retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
 			rte_eal_hugepage_init() :
 			rte_eal_hugepage_attach();
 	if (retval < 0)
-		return -1;
+		goto fail;
 
 	if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
-		return -1;
+		goto fail;
 
 	return 0;
+fail:
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	return -1;
 }
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 1ab3ade2..7300fe05 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -28,88 +28,49 @@
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
-	const struct rte_mem_config *mcfg;
+	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
 	const struct rte_memzone *mz;
-	unsigned i = 0;
+	int i = 0;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	/*
 	 * the algorithm is not optimal (linear), but there are few
 	 * zones and this function should be called at init only
 	 */
-	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
-		mz = &mcfg->memzone[i];
-		if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
-			return &mcfg->memzone[i];
+	i = rte_fbarray_find_next_used(arr, 0);
+	while (i >= 0) {
+		mz = rte_fbarray_get(arr, i);
+		if (mz->addr != NULL &&
+				!strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+			return mz;
+		i = rte_fbarray_find_next_used(arr, i + 1);
 	}
-
-	return NULL;
-}
-
-static inline struct rte_memzone *
-get_next_free_memzone(void)
-{
-	struct rte_mem_config *mcfg;
-	unsigned i = 0;
-
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr == NULL)
-			return &mcfg->memzone[i];
-	}
-
 	return NULL;
 }
 
-/* This function will return the greatest free block if a heap has been
- * specified. If no heap has been specified, it will return the heap and
- * length of the greatest free block available in all heaps */
-static size_t
-find_heap_max_free_elem(int *s, unsigned align)
-{
-	struct rte_mem_config *mcfg;
-	struct rte_malloc_socket_stats stats;
-	int i, socket = *s;
-	size_t len = 0;
-
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-		if ((socket != SOCKET_ID_ANY) && (socket != i))
-			continue;
-
-		malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
-		if (stats.greatest_free_size > len) {
-			len = stats.greatest_free_size;
-			*s = i;
-		}
-	}
-
-	if (len < MALLOC_ELEM_OVERHEAD + align)
-		return 0;
-
-	return len - MALLOC_ELEM_OVERHEAD - align;
-}
-
 static const struct rte_memzone *
 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
-		int socket_id, unsigned flags, unsigned align, unsigned bound)
+		int socket_id, unsigned int flags, unsigned int align,
+		unsigned int bound)
 {
 	struct rte_memzone *mz;
 	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	void *mz_addr;
 	size_t requested_len;
-	int socket, i;
+	int mz_idx;
+	bool contig;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	/* no more room in config */
-	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
+	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
 		rte_errno = ENOSPC;
 		return NULL;
@@ -148,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
-	len += RTE_CACHE_LINE_MASK;
-	len &= ~((size_t) RTE_CACHE_LINE_MASK);
+	len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
 
 	/* save minimal requested  length */
 	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
@@ -169,40 +129,22 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	if (!rte_eal_has_hugepages())
 		socket_id = SOCKET_ID_ANY;
 
-	if (len == 0) {
-		if (bound != 0)
-			requested_len = bound;
-		else {
-			requested_len = find_heap_max_free_elem(&socket_id, align);
-			if (requested_len == 0) {
-				rte_errno = ENOMEM;
-				return NULL;
-			}
-		}
-	}
+	contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
+	/* malloc only cares about size flags, remove contig flag from flags */
+	flags &= ~RTE_MEMZONE_IOVA_CONTIG;
 
-	if (socket_id == SOCKET_ID_ANY)
-		socket = malloc_get_numa_socket();
-	else
-		socket = socket_id;
-
-	/* allocate memory on heap */
-	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
-			requested_len, flags, align, bound);
-
-	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
-		/* try other heaps */
-		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-			if (socket == i)
-				continue;
-
-			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
-					NULL, requested_len, flags, align, bound);
-			if (mz_addr != NULL)
-				break;
-		}
+	if (len == 0 && bound == 0) {
+		/* no size constraints were placed, so use malloc elem len */
+		requested_len = 0;
+		mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+				align, contig);
+	} else {
+		if (len == 0)
+			requested_len = bound;
+		/* allocate memory on heap */
+		mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+				flags, align, bound, contig);
 	}
-
 	if (mz_addr == NULL) {
 		rte_errno = ENOMEM;
 		return NULL;
@@ -211,33 +153,38 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
 
 	/* fill the zone in config */
-	mz = get_next_free_memzone();
+	mz_idx = rte_fbarray_find_next_free(arr, 0);
+
+	if (mz_idx < 0) {
+		mz = NULL;
+	} else {
+		rte_fbarray_set_used(arr, mz_idx);
+		mz = rte_fbarray_get(arr, mz_idx);
+	}
 
 	if (mz == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
-				"in config!\n", __func__);
-		malloc_elem_free(elem);
+		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__);
+		malloc_heap_free(elem);
 		rte_errno = ENOSPC;
 		return NULL;
 	}
 
-	mcfg->memzone_cnt++;
 	snprintf(mz->name, sizeof(mz->name), "%s", name);
 	mz->iova = rte_malloc_virt2iova(mz_addr);
 	mz->addr = mz_addr;
-	mz->len = (requested_len == 0 ? elem->size : requested_len);
-	mz->hugepage_sz = elem->ms->hugepage_sz;
-	mz->socket_id = elem->ms->socket_id;
+	mz->len = requested_len == 0 ?
+			elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+			requested_len;
+	mz->hugepage_sz = elem->msl->page_sz;
+	mz->socket_id = elem->msl->socket_id;
 	mz->flags = 0;
-	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
 
 	return mz;
 }
 
 static const struct rte_memzone *
-rte_memzone_reserve_thread_safe(const char *name, size_t len,
-				int socket_id, unsigned flags, unsigned align,
-				unsigned bound)
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+		unsigned int flags, unsigned int align, unsigned int bound)
 {
 	struct rte_mem_config *mcfg;
 	const struct rte_memzone *mz = NULL;
@@ -296,34 +243,38 @@ int
 rte_memzone_free(const struct rte_memzone *mz)
 {
 	struct rte_mem_config *mcfg;
+	struct rte_fbarray *arr;
+	struct rte_memzone *found_mz;
 	int ret = 0;
-	void *addr;
+	void *addr = NULL;
 	unsigned idx;
 
 	if (mz == NULL)
 		return -EINVAL;
 
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	rte_rwlock_write_lock(&mcfg->mlock);
 
-	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
-	idx = idx / sizeof(struct rte_memzone);
+	idx = rte_fbarray_find_idx(arr, mz);
+	found_mz = rte_fbarray_get(arr, idx);
 
-	addr = mcfg->memzone[idx].addr;
-	if (addr == NULL)
+	if (found_mz == NULL) {
+		ret = -EINVAL;
+	} else if (found_mz->addr == NULL) {
+		RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
 		ret = -EINVAL;
-	else if (mcfg->memzone_cnt == 0) {
-		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
-				__func__);
 	} else {
-		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
-		mcfg->memzone_cnt--;
+		addr = found_mz->addr;
+		memset(found_mz, 0, sizeof(*found_mz));
+		rte_fbarray_set_free(arr, idx);
 	}
 
 	rte_rwlock_write_unlock(&mcfg->mlock);
 
-	rte_free(addr);
+	if (addr != NULL)
+		rte_free(addr);
 
 	return ret;
 }
@@ -348,31 +299,61 @@ rte_memzone_lookup(const char *name)
 	return memzone;
 }
 
+static void
+dump_memzone(const struct rte_memzone *mz, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	void *cur_addr, *mz_end;
+	struct rte_memseg *ms;
+	int mz_idx, ms_idx;
+	size_t page_sz;
+	FILE *f = arg;
+
+	mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
+
+	fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, "
+				"socket_id:%"PRId32", flags:%"PRIx32"\n",
+			mz_idx,
+			mz->name,
+			mz->len,
+			mz->addr,
+			mz->socket_id,
+			mz->flags);
+
+	/* go through each page occupied by this memzone */
+	msl = rte_mem_virt2memseg_list(mz->addr);
+	if (!msl) {
+		RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+		return;
+	}
+	page_sz = (size_t)mz->hugepage_sz;
+	cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
+	mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+	fprintf(f, "physical segments used:\n");
+	ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
+	ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+	do {
+		fprintf(f, "  addr: %p iova: 0x%" PRIx64 " "
+				"len: 0x%zx "
+				"pagesz: 0x%zx\n",
+			cur_addr, ms->iova, ms->len, page_sz);
+
+		/* advance VA to next page */
+		cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
+
+		/* memzones occupy contiguous segments */
+		++ms;
+	} while (cur_addr < mz_end);
+}
+
 /* Dump all reserved memory zones on console */
 void
 rte_memzone_dump(FILE *f)
 {
-	struct rte_mem_config *mcfg;
-	unsigned i = 0;
-
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	rte_rwlock_read_lock(&mcfg->mlock);
-	/* dump all zones */
-	for (i=0; i<RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr == NULL)
-			break;
-		fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx"
-		       ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
-		       mcfg->memzone[i].name,
-		       mcfg->memzone[i].iova,
-		       mcfg->memzone[i].len,
-		       mcfg->memzone[i].addr,
-		       mcfg->memzone[i].socket_id,
-		       mcfg->memzone[i].flags);
-	}
-	rte_rwlock_read_unlock(&mcfg->mlock);
+	rte_memzone_walk(dump_memzone, f);
 }
 
 /*
@@ -382,30 +363,27 @@ int
 rte_eal_memzone_init(void)
 {
 	struct rte_mem_config *mcfg;
-	const struct rte_memseg *memseg;
 
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
-	/* secondary processes don't need to initialise anything */
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
-		return 0;
+	rte_rwlock_write_lock(&mcfg->mlock);
 
-	memseg = rte_eal_get_physmem_layout();
-	if (memseg == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+			rte_fbarray_init(&mcfg->memzones, "memzone",
+			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+		return -1;
+	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+			rte_fbarray_attach(&mcfg->memzones)) {
+		RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
+		rte_rwlock_write_unlock(&mcfg->mlock);
 		return -1;
 	}
 
-	rte_rwlock_write_lock(&mcfg->mlock);
-
-	/* delete all zones */
-	mcfg->memzone_cnt = 0;
-	memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
-
 	rte_rwlock_write_unlock(&mcfg->mlock);
 
-	return rte_eal_malloc_heap_init();
+	return 0;
 }
 
 /* Walk all reserved memory zones */
@@ -413,14 +391,18 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 		      void *arg)
 {
 	struct rte_mem_config *mcfg;
-	unsigned i;
+	struct rte_fbarray *arr;
+	int i;
 
 	mcfg = rte_eal_get_configuration()->mem_config;
+	arr = &mcfg->memzones;
 
 	rte_rwlock_read_lock(&mcfg->mlock);
-	for (i=0; i<RTE_MAX_MEMZONE; i++) {
-		if (mcfg->memzone[i].addr != NULL)
-			(*func)(&mcfg->memzone[i], arg);
+	i = rte_fbarray_find_next_used(arr, 0);
+	while (i >= 0) {
+		struct rte_memzone *mz = rte_fbarray_get(arr, i);
+		(*func)(mz, arg);
+		i = rte_fbarray_find_next_used(arr, i + 1);
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d25..dd5f9740 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -27,6 +27,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
 #include "eal_filesystem.h"
+#include "eal_private.h"
 
 #define BITS_PER_HEX 4
 #define LCORE_OPT_LST 1
@@ -65,14 +66,18 @@ eal_long_options[] = {
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_IN_MEMORY,         0, NULL, OPT_IN_MEMORY_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
 	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
 	{OPT_SOCKET_MEM,        1, NULL, OPT_SOCKET_MEM_NUM       },
+	{OPT_SOCKET_LIMIT,      1, NULL, OPT_SOCKET_LIMIT_NUM     },
 	{OPT_SYSLOG,            1, NULL, OPT_SYSLOG_NUM           },
 	{OPT_VDEV,              1, NULL, OPT_VDEV_NUM             },
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
+	{OPT_LEGACY_MEM,        0, NULL, OPT_LEGACY_MEM_NUM       },
+	{OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
 	{0,                     0, NULL, 0                        }
 };
 
@@ -151,7 +156,7 @@ eal_option_device_parse(void)
 
 	TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
 		if (ret == 0) {
-			ret = rte_eal_devargs_add(devopt->type, devopt->arg);
+			ret = rte_devargs_add(devopt->type, devopt->arg);
 			if (ret)
 				RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
 					devopt->arg);
@@ -176,9 +181,16 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	/* zero out the NUMA config */
 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
 		internal_cfg->socket_mem[i] = 0;
+	internal_cfg->force_socket_limits = 0;
+	/* zero out the NUMA limits config */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->socket_limit[i] = 0;
 	/* zero out hugedir descriptors */
-	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
+		memset(&internal_cfg->hugepage_info[i], 0,
+				sizeof(internal_cfg->hugepage_info[0]));
 		internal_cfg->hugepage_info[i].lock_descriptor = -1;
+	}
 	internal_cfg->base_virtaddr = 0;
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
@@ -194,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
@@ -308,6 +321,7 @@ eal_parse_service_coremask(const char *coremask)
 	unsigned int count = 0;
 	char c;
 	int val;
+	uint32_t taken_lcore_count = 0;
 
 	if (coremask == NULL)
 		return -1;
@@ -341,7 +355,7 @@ eal_parse_service_coremask(const char *coremask)
 				if (master_lcore_parsed &&
 						cfg->master_lcore == lcore) {
 					RTE_LOG(ERR, EAL,
-						"Error: lcore %u is master lcore, cannot use as service core\n",
+						"lcore %u is master lcore, cannot use as service core\n",
 						idx);
 					return -1;
 				}
@@ -351,6 +365,10 @@ eal_parse_service_coremask(const char *coremask)
 						"lcore %u unavailable\n", idx);
 					return -1;
 				}
+
+				if (cfg->lcore_role[idx] == ROLE_RTE)
+					taken_lcore_count++;
+
 				lcore_config[idx].core_role = ROLE_SERVICE;
 				count++;
 			}
@@ -367,11 +385,28 @@ eal_parse_service_coremask(const char *coremask)
 	if (count == 0)
 		return -1;
 
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores are in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
 	cfg->service_lcore_count = count;
 	return 0;
 }
 
 static int
+eal_service_cores_parsed(void)
+{
+	int idx;
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		if (lcore_config[idx].core_role == ROLE_SERVICE)
+			return 1;
+	}
+	return 0;
+}
+
+static int
 eal_parse_coremask(const char *coremask)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
@@ -380,6 +415,11 @@ eal_parse_coremask(const char *coremask)
 	char c;
 	int val;
 
+	if (eal_service_cores_parsed())
+		RTE_LOG(WARNING, EAL,
+			"Service cores parsed before dataplane cores. "
+			"Please ensure -c is before -s or -S\n");
+
 	if (coremask == NULL)
 		return -1;
 	/* Remove all blank characters ahead and after .
@@ -411,6 +451,7 @@ eal_parse_coremask(const char *coremask)
 					        "unavailable\n", idx);
 					return -1;
 				}
+
 				cfg->lcore_role[idx] = ROLE_RTE;
 				lcore_config[idx].core_index = count;
 				count++;
@@ -442,6 +483,7 @@ eal_parse_service_corelist(const char *corelist)
 	unsigned count = 0;
 	char *end = NULL;
 	int min, max;
+	uint32_t taken_lcore_count = 0;
 
 	if (corelist == NULL)
 		return -1;
@@ -483,6 +525,9 @@ eal_parse_service_corelist(const char *corelist)
 							idx);
 						return -1;
 					}
+					if (cfg->lcore_role[idx] == ROLE_RTE)
+						taken_lcore_count++;
+
 					lcore_config[idx].core_role =
 							ROLE_SERVICE;
 					count++;
@@ -497,6 +542,12 @@ eal_parse_service_corelist(const char *corelist)
 	if (count == 0)
 		return -1;
 
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores were in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
 	return 0;
 }
 
@@ -509,6 +560,11 @@ eal_parse_corelist(const char *corelist)
 	char *end = NULL;
 	int min, max;
 
+	if (eal_service_cores_parsed())
+		RTE_LOG(WARNING, EAL,
+			"Service cores parsed before dataplane cores. "
+			"Please ensure -l is before -s or -S\n");
+
 	if (corelist == NULL)
 		return -1;
 
@@ -583,7 +639,8 @@ eal_parse_master_lcore(const char *arg)
 
 	/* ensure master core is not used as service core */
 	if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
-		RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+		RTE_LOG(ERR, EAL,
+			"Error: Master lcore is used as a service core\n");
 		return -1;
 	}
 
@@ -875,7 +932,7 @@ static int
 eal_parse_syslog(const char *facility, struct internal_config *conf)
 {
 	int i;
-	static struct {
+	static const struct {
 		const char *name;
 		int value;
 	} map[] = {
@@ -911,43 +968,92 @@ eal_parse_syslog(const char *facility, struct internal_config *conf)
 }
 
 static int
-eal_parse_log_level(const char *arg)
+eal_parse_log_priority(const char *level)
 {
-	char *end, *str, *type, *level;
+	static const char * const levels[] = {
+		[RTE_LOG_EMERG]   = "emergency",
+		[RTE_LOG_ALERT]   = "alert",
+		[RTE_LOG_CRIT]    = "critical",
+		[RTE_LOG_ERR]     = "error",
+		[RTE_LOG_WARNING] = "warning",
+		[RTE_LOG_NOTICE]  = "notice",
+		[RTE_LOG_INFO]    = "info",
+		[RTE_LOG_DEBUG]   = "debug",
+	};
+	size_t len = strlen(level);
 	unsigned long tmp;
+	char *end;
+	unsigned int i;
 
-	str = strdup(arg);
-	if (str == NULL)
+	if (len == 0)
 		return -1;
 
-	if (strchr(str, ',') == NULL) {
-		type = NULL;
-		level = str;
-	} else {
-		type = strsep(&str, ",");
-		level = strsep(&str, ",");
+	/* look for named values, skip 0 which is not a valid level */
+	for (i = 1; i < RTE_DIM(levels); i++) {
+		if (strncmp(levels[i], level, len) == 0)
+			return i;
 	}
 
+	/* not a string, maybe it is numeric */
 	errno = 0;
 	tmp = strtoul(level, &end, 0);
 
 	/* check for errors */
-	if ((errno != 0) || (level[0] == '\0') ||
-		    end == NULL || (*end != '\0'))
-		goto fail;
+	if (errno != 0 || end == NULL || *end != '\0' ||
+	    tmp >= UINT32_MAX)
+		return -1;
 
-	/* log_level is a uint32_t */
-	if (tmp >= UINT32_MAX)
-		goto fail;
+	return tmp;
+}
+
+static int
+eal_parse_log_level(const char *arg)
+{
+	const char *pattern = NULL;
+	const char *regex = NULL;
+	char *str, *level;
+	int priority;
+
+	str = strdup(arg);
+	if (str == NULL)
+		return -1;
 
-	if (type == NULL) {
-		rte_log_set_global_level(tmp);
-	} else if (rte_log_set_level_regexp(type, tmp) < 0) {
-		printf("cannot set log level %s,%lu\n",
-			type, tmp);
+	if ((level = strchr(str, ','))) {
+		regex = str;
+		*level++ = '\0';
+	} else if ((level = strchr(str, ':'))) {
+		pattern = str;
+		*level++ = '\0';
+	} else {
+		level = str;
+	}
+
+	priority = eal_parse_log_priority(level);
+	if (priority < 0) {
+		fprintf(stderr, "invalid log priority: %s\n", level);
 		goto fail;
 	}
 
+	if (regex) {
+		if (rte_log_set_level_regexp(regex, priority) < 0) {
+			fprintf(stderr, "cannot set log level %s,%d\n",
+				pattern, priority);
+			goto fail;
+		}
+		if (rte_log_save_regexp(regex, priority) < 0)
+			goto fail;
+	} else if (pattern) {
+		if (rte_log_set_level_pattern(pattern, priority) < 0) {
+			fprintf(stderr, "cannot set log level %s:%d\n",
+				pattern, priority);
+			goto fail;
+		}
+		if (rte_log_save_pattern(pattern, priority) < 0)
+			goto fail;
+	} else {
+		rte_log_set_global_level(priority);
+	}
+
 	free(str);
 	return 0;
 
@@ -1089,6 +1195,8 @@ eal_parse_common_option(int opt, const char *optarg,
 
 	case OPT_NO_HUGE_NUM:
 		conf->no_hugetlbfs = 1;
+		/* no-huge is legacy mem */
+		conf->legacy_mem = 1;
 		break;
 
 	case OPT_NO_PCI_NUM:
@@ -1107,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_shconf = 1;
 		break;
 
+	case OPT_IN_MEMORY_NUM:
+		conf->in_memory = 1;
+		/* in-memory is a superset of noshconf and huge-unlink */
+		conf->no_shconf = 1;
+		conf->hugepage_unlink = 1;
+		break;
+
 	case OPT_PROC_TYPE_NUM:
 		conf->process_type = eal_parse_proc_type(optarg);
 		break;
@@ -1160,6 +1275,12 @@ eal_parse_common_option(int opt, const char *optarg,
 
 		core_parsed = LCORE_OPT_MAP;
 		break;
+	case OPT_LEGACY_MEM_NUM:
+		conf->legacy_mem = 1;
+		break;
+	case OPT_SINGLE_FILE_SEGMENTS_NUM:
+		conf->single_file_segments = 1;
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1252,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg)
 			"be specified together with --"OPT_NO_HUGE"\n");
 		return -1;
 	}
-
-	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+			!internal_cfg->in_memory) {
 		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
 			"be specified together with --"OPT_NO_HUGE"\n");
 		return -1;
 	}
+	if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+			" is only supported in non-legacy memory mode\n");
+	}
+	if (internal_cfg->single_file_segments &&
+			internal_cfg->hugepage_unlink) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+			"not compatible with neither --"OPT_IN_MEMORY" nor "
+			"--"OPT_HUGE_UNLINK"\n");
+		return -1;
+	}
 
 	return 0;
 }
@@ -1302,10 +1434,12 @@ eal_common_usage(void)
 	       "  --"OPT_PROC_TYPE"         Type of this process (primary|secondary|auto)\n"
 	       "  --"OPT_SYSLOG"            Set syslog facility\n"
 	       "  --"OPT_LOG_LEVEL"=<int>   Set global log level\n"
-	       "  --"OPT_LOG_LEVEL"=<type-regexp>,<int>\n"
+	       "  --"OPT_LOG_LEVEL"=<type-match>:<int>\n"
 	       "                      Set specific log level\n"
 	       "  -v                  Display version information on startup\n"
 	       "  -h, --help          This help\n"
+	       "  --"OPT_IN_MEMORY"   Operate entirely in memory. This will\n"
+	       "                      disable secondary process support\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index caa8774a..9fcb9121 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,18 +13,21 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <unistd.h>
 
+#include <rte_alarm.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
 #include <rte_eal.h>
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#include <rte_tailq.h>
 
 #include "eal_private.h"
 #include "eal_filesystem.h"
@@ -51,6 +54,7 @@ enum mp_type {
 	MP_MSG, /* Share message with peers, will not block */
 	MP_REQ, /* Request for information, Will block for a reply */
 	MP_REP, /* Response to previously-received request */
+	MP_IGN, /* Response telling requester to ignore this response */
 };
 
 struct mp_msg_internal {
@@ -58,31 +62,66 @@ struct mp_msg_internal {
 	struct rte_mp_msg msg;
 };
 
-struct sync_request {
-	TAILQ_ENTRY(sync_request) next;
-	int reply_received;
+struct async_request_param {
+	rte_mp_async_reply_t clb;
+	struct rte_mp_reply user_reply;
+	struct timespec end;
+	int n_responses_processed;
+};
+
+struct pending_request {
+	TAILQ_ENTRY(pending_request) next;
+	enum {
+		REQUEST_TYPE_SYNC,
+		REQUEST_TYPE_ASYNC
+	} type;
 	char dst[PATH_MAX];
 	struct rte_mp_msg *request;
 	struct rte_mp_msg *reply;
-	pthread_cond_t cond;
+	int reply_received;
+	RTE_STD_C11
+	union {
+		struct {
+			struct async_request_param *param;
+		} async;
+		struct {
+			pthread_cond_t cond;
+		} sync;
+	};
 };
 
-TAILQ_HEAD(sync_request_list, sync_request);
+TAILQ_HEAD(pending_request_list, pending_request);
 
 static struct {
-	struct sync_request_list requests;
+	struct pending_request_list requests;
 	pthread_mutex_t lock;
-} sync_requests = {
-	.requests = TAILQ_HEAD_INITIALIZER(sync_requests.requests),
-	.lock = PTHREAD_MUTEX_INITIALIZER
+} pending_requests = {
+	.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
+	.lock = PTHREAD_MUTEX_INITIALIZER,
+	/**< used in async requests only */
 };
 
-static struct sync_request *
-find_sync_request(const char *dst, const char *act_name)
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
+
+static struct pending_request *
+find_pending_request(const char *dst, const char *act_name)
 {
-	struct sync_request *r;
+	struct pending_request *r;
 
-	TAILQ_FOREACH(r, &sync_requests.requests, next) {
+	TAILQ_FOREACH(r, &pending_requests.requests, next) {
 		if (!strcmp(r->dst, dst) &&
 		    !strcmp(r->request->name, act_name))
 			break;
@@ -91,6 +130,17 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		strlcpy(buf, prefix, len);
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -159,7 +209,7 @@ rte_mp_action_register(const char *name, rte_mp_t action)
 		rte_errno = ENOMEM;
 		return -1;
 	}
-	strcpy(entry->action_name, name);
+	strlcpy(entry->action_name, name, sizeof(entry->action_name));
 	entry->action = action;
 
 	pthread_mutex_lock(&mp_mutex_action);
@@ -241,23 +291,35 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 static void
 process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 {
-	struct sync_request *sync_req;
+	struct pending_request *pending_req;
 	struct action_entry *entry;
 	struct rte_mp_msg *msg = &m->msg;
 	rte_mp_t action = NULL;
 
 	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
 
-	if (m->type == MP_REP) {
-		pthread_mutex_lock(&sync_requests.lock);
-		sync_req = find_sync_request(s->sun_path, msg->name);
-		if (sync_req) {
-			memcpy(sync_req->reply, msg, sizeof(*msg));
-			sync_req->reply_received = 1;
-			pthread_cond_signal(&sync_req->cond);
+	if (m->type == MP_REP || m->type == MP_IGN) {
+		struct pending_request *req = NULL;
+
+		pthread_mutex_lock(&pending_requests.lock);
+		pending_req = find_pending_request(s->sun_path, msg->name);
+		if (pending_req) {
+			memcpy(pending_req->reply, msg, sizeof(*msg));
+			/* -1 indicates that we've been asked to ignore */
+			pending_req->reply_received =
+				m->type == MP_REP ? 1 : -1;
+
+			if (pending_req->type == REQUEST_TYPE_SYNC)
+				pthread_cond_signal(&pending_req->sync.cond);
+			else if (pending_req->type == REQUEST_TYPE_ASYNC)
+				req = async_reply_handle_thread_unsafe(
+						pending_req);
 		} else
 			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
-		pthread_mutex_unlock(&sync_requests.lock);
+		pthread_mutex_unlock(&pending_requests.lock);
+
+		if (req != NULL)
+			trigger_async_action(req);
 		return;
 	}
 
@@ -267,10 +329,25 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 		action = entry->action;
 	pthread_mutex_unlock(&mp_mutex_action);
 
-	if (!action)
-		RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name);
-	else if (action(msg, s->sun_path) < 0)
+	if (!action) {
+		if (m->type == MP_REQ && !internal_config.init_complete) {
+			/* if this is a request, and init is not yet complete,
+			 * and callback wasn't registered, we should tell the
+			 * requester to ignore our existence because we're not
+			 * yet ready to process this request.
+			 */
+			struct rte_mp_msg dummy;
+
+			memset(&dummy, 0, sizeof(dummy));
+			strlcpy(dummy.name, msg->name, sizeof(dummy.name));
+			mp_send(&dummy, s->sun_path, MP_IGN);
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+				msg->name);
+		}
+	} else if (action(msg, s->sun_path) < 0) {
 		RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+	}
 }
 
 static void *
@@ -288,10 +365,158 @@ mp_handle(void *arg __rte_unused)
 }
 
 static int
+timespec_cmp(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec < b->tv_sec)
+		return -1;
+	if (a->tv_sec > b->tv_sec)
+		return 1;
+	if (a->tv_nsec < b->tv_nsec)
+		return -1;
+	if (a->tv_nsec > b->tv_nsec)
+		return 1;
+	return 0;
+}
+
+enum async_action {
+	ACTION_FREE, /**< free the action entry, but don't trigger callback */
+	ACTION_TRIGGER /**< trigger callback, then free action entry */
+};
+
+static enum async_action
+process_async_request(struct pending_request *sr, const struct timespec *now)
+{
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+	bool timeout, last_msg;
+
+	param = sr->async.param;
+	reply = &param->user_reply;
+
+	/* did we timeout? */
+	timeout = timespec_cmp(&param->end, now) <= 0;
+
+	/* if we received a response, adjust relevant data and copy mesasge. */
+	if (sr->reply_received == 1 && sr->reply) {
+		struct rte_mp_msg *msg, *user_msgs, *tmp;
+
+		msg = sr->reply;
+		user_msgs = reply->msgs;
+
+		tmp = realloc(user_msgs, sizeof(*msg) *
+				(reply->nb_received + 1));
+		if (!tmp) {
+			RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+				sr->dst, sr->request->name);
+			/* this entry is going to be removed and its message
+			 * dropped, but we don't want to leak memory, so
+			 * continue.
+			 */
+		} else {
+			user_msgs = tmp;
+			reply->msgs = user_msgs;
+			memcpy(&user_msgs[reply->nb_received],
+					msg, sizeof(*msg));
+			reply->nb_received++;
+		}
+
+		/* mark this request as processed */
+		param->n_responses_processed++;
+	} else if (sr->reply_received == -1) {
+		/* we were asked to ignore this process */
+		reply->nb_sent--;
+	} else if (timeout) {
+		/* count it as processed response, but don't increment
+		 * nb_received.
+		 */
+		param->n_responses_processed++;
+	}
+
+	free(sr->reply);
+
+	last_msg = param->n_responses_processed == reply->nb_sent;
+
+	return last_msg ? ACTION_TRIGGER : ACTION_FREE;
+}
+
+static void
+trigger_async_action(struct pending_request *sr)
+{
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+
+	param = sr->async.param;
+	reply = &param->user_reply;
+
+	param->clb(sr->request, reply);
+
+	/* clean up */
+	free(sr->async.param->user_reply.msgs);
+	free(sr->async.param);
+	free(sr->request);
+	free(sr);
+}
+
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg)
+{
+	struct pending_request *req = (struct pending_request *)arg;
+	enum async_action action;
+	struct timespec ts_now;
+	struct timeval now;
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Cannot get current time\n");
+		goto no_trigger;
+	}
+	ts_now.tv_nsec = now.tv_usec * 1000;
+	ts_now.tv_sec = now.tv_sec;
+
+	action = process_async_request(req, &ts_now);
+
+	TAILQ_REMOVE(&pending_requests.requests, req, next);
+
+	if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+		/* if we failed to cancel the alarm because it's already in
+		 * progress, don't proceed because otherwise we will end up
+		 * handling the same message twice.
+		 */
+		if (rte_errno == EINPROGRESS) {
+			RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+			goto no_trigger;
+		}
+		RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
+	}
+
+	if (action == ACTION_TRIGGER)
+		return req;
+no_trigger:
+	free(req);
+	return NULL;
+}
+
+static void
+async_reply_handle(void *arg)
+{
+	struct pending_request *req;
+
+	pthread_mutex_lock(&pending_requests.lock);
+	req = async_reply_handle_thread_unsafe(arg);
+	pthread_mutex_unlock(&pending_requests.lock);
+
+	if (req != NULL)
+		trigger_async_action(req);
+}
+
+static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name),
+				"%d_%"PRIx64, getpid(), rte_rdtsc());
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -301,13 +526,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -342,54 +565,70 @@ unlink_sockets(const char *filter)
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
-	pthread_t tid;
+	char path[PATH_MAX];
+	int dir_fd;
+	pthread_t mp_handle_tid;
+
+	/* in no shared files mode, we do not have secondary processes support,
+	 * so no need to initialize IPC.
+	 */
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+		return 0;
+	}
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	strlcpy(mp_filter, basename(path), sizeof(mp_filter));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
+
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
-	    unlink_sockets(mp_filter)) {
+			unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
-	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
+	if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
+			NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
 			strerror(errno));
 		close(mp_fd);
+		close(dir_fd);
 		mp_fd = -1;
 		return -1;
 	}
 
-	/* try best to set thread name */
-	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
-	rte_thread_setname(tid, thread_name);
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -416,7 +655,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 
 	memset(&dst, 0, sizeof(dst));
 	dst.sun_family = AF_UNIX;
-	snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
+	strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
 
 	memset(&msgh, 0, sizeof(msgh));
 	memset(control, 0, sizeof(control));
@@ -444,13 +683,12 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 	if (snd < 0) {
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
-		if (errno == -ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
 			return 0;
 		}
-		if (errno == -ENOBUFS) {
+		if (errno == ENOBUFS) {
 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
 				dst_path);
 			return 0;
@@ -466,7 +704,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -488,14 +726,32 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
-		if (send_msg(ent->d_name, msg, type) < 0)
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+		if (send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
@@ -539,25 +795,82 @@ rte_mp_sendmsg(struct rte_mp_msg *msg)
 }
 
 static int
-mp_request_one(const char *dst, struct rte_mp_msg *req,
+mp_request_async(const char *dst, struct rte_mp_msg *req,
+		struct async_request_param *param, const struct timespec *ts)
+{
+	struct rte_mp_msg *reply_msg;
+	struct pending_request *pending_req, *exist;
+	int ret;
+
+	pending_req = calloc(1, sizeof(*pending_req));
+	reply_msg = calloc(1, sizeof(*reply_msg));
+	if (pending_req == NULL || reply_msg == NULL) {
+		RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto fail;
+	}
+
+	pending_req->type = REQUEST_TYPE_ASYNC;
+	strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
+	pending_req->request = req;
+	pending_req->reply = reply_msg;
+	pending_req->async.param = param;
+
+	/* queue already locked by caller */
+
+	exist = find_pending_request(dst, req->name);
+	if (exist) {
+		RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+		rte_errno = EEXIST;
+		ret = -1;
+		goto fail;
+	}
+
+	ret = send_msg(dst, req, MP_REQ);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+			dst, req->name);
+		ret = -1;
+		goto fail;
+	} else if (ret == 0) {
+		ret = 0;
+		goto fail;
+	}
+	TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
+
+	param->user_reply.nb_sent++;
+
+	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+			      async_reply_handle, pending_req) < 0) {
+		RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+			dst, req->name);
+		rte_panic("Fix the above shit to properly free all memory\n");
+	}
+
+	return 0;
+fail:
+	free(pending_req);
+	free(reply_msg);
+	return ret;
+}
+
+static int
+mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
-	struct timeval now;
 	struct rte_mp_msg msg, *tmp;
-	struct sync_request sync_req, *exist;
-
-	sync_req.reply_received = 0;
-	strcpy(sync_req.dst, dst);
-	sync_req.request = req;
-	sync_req.reply = &msg;
-	pthread_cond_init(&sync_req.cond, NULL);
-
-	pthread_mutex_lock(&sync_requests.lock);
-	exist = find_sync_request(dst, req->name);
-	if (!exist)
-		TAILQ_INSERT_TAIL(&sync_requests.requests, &sync_req, next);
-	pthread_mutex_unlock(&sync_requests.lock);
+	struct pending_request pending_req, *exist;
+
+	pending_req.type = REQUEST_TYPE_SYNC;
+	pending_req.reply_received = 0;
+	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
+	pending_req.request = req;
+	pending_req.reply = &msg;
+	pthread_cond_init(&pending_req.sync.cond, NULL);
+
+	exist = find_pending_request(dst, req->name);
 	if (exist) {
 		RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
 		rte_errno = EEXIST;
@@ -572,33 +885,31 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	} else if (ret == 0)
 		return 0;
 
+	TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
+
 	reply->nb_sent++;
 
-	pthread_mutex_lock(&sync_requests.lock);
 	do {
-		pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts);
-		/* Check spurious wakeups */
-		if (sync_req.reply_received == 1)
-			break;
-		/* Check if time is out */
-		if (gettimeofday(&now, NULL) < 0)
-			break;
-		if (now.tv_sec < ts->tv_sec)
-			break;
-		else if (now.tv_sec == ts->tv_sec &&
-			 now.tv_usec * 1000 < ts->tv_nsec)
-			break;
-	} while (1);
-	/* We got the lock now */
-	TAILQ_REMOVE(&sync_requests.requests, &sync_req, next);
-	pthread_mutex_unlock(&sync_requests.lock);
+		ret = pthread_cond_timedwait(&pending_req.sync.cond,
+				&pending_requests.lock, ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
+	TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
 
-	if (sync_req.reply_received == 0) {
+	if (pending_req.reply_received == 0) {
 		RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
 			dst, req->name);
 		rte_errno = ETIMEDOUT;
 		return -1;
 	}
+	if (pending_req.reply_received == -1) {
+		RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+		/* not receiving this message is not an error, so decrement
+		 * number of sent messages
+		 */
+		reply->nb_sent--;
+		return 0;
+	}
 
 	tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
 	if (!tmp) {
@@ -614,10 +925,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 }
 
 int __rte_experimental
-rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -627,6 +938,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 
 	if (check_input(req) == false)
 		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
 	if (gettimeofday(&now, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "Faile to get current time\n");
 		rte_errno = errno;
@@ -642,8 +959,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	reply->msgs = NULL;
 
 	/* for secondary process, send request to the primary process only */
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
-		return mp_request_one(eal_mp_socket_path(), req, reply, &end);
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		pthread_mutex_lock(&pending_requests.lock);
+		ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
+		pthread_mutex_unlock(&pending_requests.lock);
+		return ret;
+	}
 
 	/* for primary process, broadcast request, and collect reply 1 by 1 */
 	mp_dir = opendir(mp_dir_path);
@@ -653,22 +974,193 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
+	pthread_mutex_lock(&pending_requests.lock);
 	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
-		if (mp_request_one(ent->d_name, req, reply, &end))
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+
+		/* unlocks the mutex while waiting for response,
+		 * locks on receive
+		 */
+		if (mp_request_sync(path, req, reply, &end))
 			ret = -1;
 	}
+	pthread_mutex_unlock(&pending_requests.lock);
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
 
 int __rte_experimental
-rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+		rte_mp_async_reply_t clb)
 {
+	struct rte_mp_msg *copy;
+	struct pending_request *dummy;
+	struct async_request_param *param;
+	struct rte_mp_reply *reply;
+	int dir_fd, ret = 0;
+	DIR *mp_dir;
+	struct dirent *ent;
+	struct timeval now;
+	struct timespec *end;
+	bool dummy_used = false;
+
+	RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+	if (check_input(req) == false)
+		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Faile to get current time\n");
+		rte_errno = errno;
+		return -1;
+	}
+	copy = calloc(1, sizeof(*copy));
+	dummy = calloc(1, sizeof(*dummy));
+	param = calloc(1, sizeof(*param));
+	if (copy == NULL || dummy == NULL || param == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
+		rte_errno = ENOMEM;
+		goto fail;
+	}
+
+	/* copy message */
+	memcpy(copy, req, sizeof(*copy));
+
+	param->n_responses_processed = 0;
+	param->clb = clb;
+	end = &param->end;
+	reply = &param->user_reply;
+
+	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	end->tv_sec = now.tv_sec + ts->tv_sec +
+			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+	reply->nb_sent = 0;
+	reply->nb_received = 0;
+	reply->msgs = NULL;
+
+	/* we have to lock the request queue here, as we will be adding a bunch
+	 * of requests to the queue at once, and some of the replies may arrive
+	 * before we add all of the requests to the queue.
+	 */
+	pthread_mutex_lock(&pending_requests.lock);
+
+	/* we have to ensure that callback gets triggered even if we don't send
+	 * anything, therefore earlier we have allocated a dummy request. fill
+	 * it, and put it on the queue if we don't send any requests.
+	 */
+	dummy->type = REQUEST_TYPE_ASYNC;
+	dummy->request = copy;
+	dummy->reply = NULL;
+	dummy->async.param = param;
+	dummy->reply_received = 1; /* short-circuit the timeout */
+
+	/* for secondary process, send request to the primary process only */
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
+
+		/* if we didn't send anything, put dummy request on the queue */
+		if (ret == 0 && reply->nb_sent == 0) {
+			TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
+					next);
+			dummy_used = true;
+		}
+
+		pthread_mutex_unlock(&pending_requests.lock);
+
+		/* if we couldn't send anything, clean up */
+		if (ret != 0)
+			goto fail;
+		return 0;
+	}
+
+	/* for primary process, broadcast request */
+	mp_dir = opendir(mp_dir_path);
+	if (!mp_dir) {
+		RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+		rte_errno = errno;
+		goto unlock_fail;
+	}
+	dir_fd = dirfd(mp_dir);
 
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_SH)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		goto closedir_fail;
+	}
+
+	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
+		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+
+		if (mp_request_async(path, copy, param, ts))
+			ret = -1;
+	}
+	/* if we didn't send anything, put dummy request on the queue */
+	if (ret == 0 && reply->nb_sent == 0) {
+		TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
+		dummy_used = true;
+	}
+
+	/* finally, unlock the queue */
+	pthread_mutex_unlock(&pending_requests.lock);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+
+	/* dir_fd automatically closed on closedir */
+	closedir(mp_dir);
+
+	/* if dummy was unused, free it */
+	if (!dummy_used)
+		free(dummy);
+
+	return ret;
+closedir_fail:
+	closedir(mp_dir);
+unlock_fail:
+	pthread_mutex_unlock(&pending_requests.lock);
+fail:
+	free(dummy);
+	free(param);
+	free(copy);
+	return -1;
+}
+
+int __rte_experimental
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
 	RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
 
 	if (check_input(msg) == false)
@@ -680,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
 		return -1;
 	}
 
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
 	return mp_send(msg, peer, MP_REP);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 40902e49..48ef4d6d 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -7,6 +7,7 @@
 #include <stdint.h>
 #include <unistd.h>
 #include <pthread.h>
+#include <signal.h>
 #include <sched.h>
 #include <assert.h>
 #include <string.h>
@@ -15,6 +16,7 @@
 #include <rte_memory.h>
 #include <rte_log.h>
 
+#include "eal_private.h"
 #include "eal_thread.h"
 
 RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
@@ -32,10 +34,7 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
 	if (lcore_id >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	if (cfg->lcore_role[lcore_id] == role)
-		return 0;
-
-	return -EINVAL;
+	return cfg->lcore_role[lcore_id] == role;
 }
 
 int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
@@ -140,3 +139,94 @@ exit:
 
 	return ret;
 }
+
+
+struct rte_thread_ctrl_params {
+	void *(*start_routine)(void *);
+	void *arg;
+	pthread_barrier_t configured;
+};
+
+static void *rte_thread_init(void *arg)
+{
+	int ret;
+	struct rte_thread_ctrl_params *params = arg;
+	void *(*start_routine)(void *) = params->start_routine;
+	void *routine_arg = params->arg;
+
+	ret = pthread_barrier_wait(&params->configured);
+	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+
+	return start_routine(routine_arg);
+}
+
+__rte_experimental int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+		const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg)
+{
+	struct rte_thread_ctrl_params *params;
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+	int cpu_found, ret;
+
+	params = malloc(sizeof(*params));
+	if (!params)
+		return -ENOMEM;
+
+	params->start_routine = start_routine;
+	params->arg = arg;
+
+	pthread_barrier_init(&params->configured, NULL, 2);
+
+	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	if (ret != 0) {
+		free(params);
+		return -ret;
+	}
+
+	if (name != NULL) {
+		ret = rte_thread_setname(*thread, name);
+		if (ret < 0)
+			RTE_LOG(DEBUG, EAL,
+				"Cannot set name for ctrl thread\n");
+	}
+
+	cpu_found = 0;
+	CPU_ZERO(&cpuset);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (eal_cpu_detected(lcore_id) &&
+				rte_lcore_has_role(lcore_id, ROLE_OFF)) {
+			CPU_SET(lcore_id, &cpuset);
+			cpu_found = 1;
+		}
+	}
+	/* if no detected cpu is off, use master core */
+	if (!cpu_found)
+		CPU_SET(rte_get_master_lcore(), &cpuset);
+
+	ret = pthread_setaffinity_np(*thread, sizeof(cpuset), &cpuset);
+	if (ret < 0)
+		goto fail;
+
+	ret = pthread_barrier_wait(&params->configured);
+	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+
+	return 0;
+
+fail:
+	if (PTHREAD_BARRIER_SERIAL_THREAD ==
+	    pthread_barrier_wait(&params->configured)) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+	pthread_cancel(*thread);
+	pthread_join(*thread, NULL);
+	return -ret;
+}
diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 00000000..1b93c5b3
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.	 IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+	uint32_t	time_low;
+	uint16_t	time_mid;
+	uint16_t	time_hi_and_version;
+	uint16_t	clock_seq;
+	uint8_t		node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+	uint32_t tmp;
+	uint8_t	*out = ptr;
+
+	tmp = uu->time_low;
+	out[3] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[2] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[1] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[0] = (uint8_t) tmp;
+
+	tmp = uu->time_mid;
+	out[5] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[4] = (uint8_t) tmp;
+
+	tmp = uu->time_hi_and_version;
+	out[7] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[6] = (uint8_t) tmp;
+
+	tmp = uu->clock_seq;
+	out[9] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[8] = (uint8_t) tmp;
+
+	memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+	const uint8_t *ptr = in;
+	uint32_t tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_low = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_mid = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_hi_and_version = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->clock_seq = tmp;
+
+	memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+	const uint8_t *cp = uu;
+	int i;
+
+	for (i = 0; i < 16; i++)
+		if (*cp++)
+			return false;
+	return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+	struct uuid	uuid1, uuid2;
+
+	uuid_unpack(uu1, &uuid1);
+	uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+	do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+	UUCMP(uuid1.time_low, uuid2.time_low);
+	UUCMP(uuid1.time_mid, uuid2.time_mid);
+	UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+	UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+	return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+	struct uuid	uuid;
+	int		i;
+	const char	*cp;
+	char		buf[3];
+
+	if (strlen(in) != 36)
+		return -1;
+
+	for (i = 0, cp = in; i <= 36; i++, cp++) {
+		if ((i == 8) || (i == 13) || (i == 18) ||
+		    (i == 23)) {
+			if (*cp == '-')
+				continue;
+			else
+				return -1;
+		}
+		if (i == 36)
+			if (*cp == 0)
+				continue;
+		if (!isxdigit(*cp))
+			return -1;
+	}
+
+	uuid.time_low = strtoul(in, NULL, 16);
+	uuid.time_mid = strtoul(in+9, NULL, 16);
+	uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+	uuid.clock_seq = strtoul(in+19, NULL, 16);
+	cp = in+24;
+	buf[2] = 0;
+
+	for (i = 0; i < 6; i++) {
+		buf[0] = *cp++;
+		buf[1] = *cp++;
+		uuid.node[i] = strtoul(buf, NULL, 16);
+	}
+
+	uuid_pack(&uuid, uu);
+	return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+	struct uuid uuid;
+
+	uuid_unpack(uu, &uuid);
+
+	snprintf(out, len,
+		 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+		uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+		uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+		uuid.node[0], uuid.node[1], uuid.node[2],
+		uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 4708dd54..de05febf 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -12,7 +12,6 @@
 #define EAL_FILESYSTEM_H
 
 /** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
 
 #include <stdint.h>
 #include <limits.h>
@@ -22,60 +21,70 @@
 #include <rte_string_fns.h>
 #include "eal_internal_cfg.h"
 
-static const char *default_config_dir = "/var/run";
+/* sets up platform-specific runtime data dir */
+int
+eal_create_runtime_dir(void);
 
+/* returns runtime dir */
+const char *
+eal_get_runtime_dir(void);
+
+#define RUNTIME_CONFIG_FNAME "config"
 static inline const char *
 eal_runtime_config_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
-	const char *directory = default_config_dir;
-	const char *home_dir = getenv("HOME");
 
-	if (getuid() != 0 && home_dir != NULL)
-		directory = home_dir;
-	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
-			internal_config.hugefile_prefix);
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+			RUNTIME_CONFIG_FNAME);
 	return buffer;
 }
 
 /** Path of primary/secondary communication unix socket file. */
-#define MP_SOCKET_PATH_FMT "%s/.%s_unix"
+#define MP_SOCKET_FNAME "mp_socket"
 static inline const char *
 eal_mp_socket_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
-	const char *directory = default_config_dir;
-	const char *home_dir = getenv("HOME");
 
-	if (getuid() != 0 && home_dir != NULL)
-		directory = home_dir;
-	snprintf(buffer, sizeof(buffer) - 1, MP_SOCKET_PATH_FMT,
-		 directory, internal_config.hugefile_prefix);
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+			MP_SOCKET_FNAME);
+	return buffer;
+}
 
+#define FBARRAY_NAME_FMT "%s/fbarray_%s"
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+	snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name);
 	return buffer;
 }
 
 /** Path of hugepage info file. */
-#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
-
+#define HUGEPAGE_INFO_FNAME "hugepage_info"
 static inline const char *
 eal_hugepage_info_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
-	const char *directory = default_config_dir;
-	const char *home_dir = getenv("HOME");
 
-	if (getuid() != 0 && home_dir != NULL)
-		directory = home_dir;
-	snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory,
-			internal_config.hugefile_prefix);
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+			HUGEPAGE_INFO_FNAME);
+	return buffer;
+}
+
+/** Path of hugepage data file. */
+#define HUGEPAGE_DATA_FNAME "hugepage_data"
+static inline const char *
+eal_hugepage_data_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+			HUGEPAGE_DATA_FNAME);
 	return buffer;
 }
 
 /** String format for hugepage map files. */
 #define HUGEFILE_FMT "%s/%smap_%d"
-#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
-
 static inline const char *
 eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
 {
@@ -85,6 +94,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
 	return buffer;
 }
 
+/** String format for hugepage map lock files. */
+#define HUGEFILE_LOCK_FMT "%s/map_%d.lock"
+static inline const char *
+eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id)
+{
+	snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(),
+			f_id);
+	buffer[buflen - 1] = '\0';
+	return buffer;
+}
+
 /** define the default filename prefix for the %s values above */
 #define HUGEFILE_PREFIX_DEFAULT "rte"
 
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index 1d519bbb..4582f19c 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -22,14 +22,19 @@ struct hugepage_file {
 	size_t size;        /**< the page size */
 	int socket_id;      /**< NUMA socket ID */
 	int file_id;        /**< the '%d' in HUGEFILE_FMT */
-	int memseg_id;      /**< the memory segment to which page belongs */
 	char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
 };
 
 /**
- * Read the information from linux on what hugepages are available
- * for the EAL to use
+ * Read the information on what hugepages are available for the EAL to use,
+ * clearing out any unused ones.
  */
 int eal_hugepage_info_init(void);
 
+/**
+ * Read whatever information primary process has shared about hugepages into
+ * secondary process.
+ */
+int eal_hugepage_info_read(void);
+
 #endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc3..00ee6e06 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -21,9 +21,9 @@
  */
 struct hugepage_info {
 	uint64_t hugepage_sz;   /**< size of a huge page */
-	const char *hugedir;    /**< dir where hugetlbfs is mounted */
+	char hugedir[PATH_MAX];    /**< dir where hugetlbfs is mounted */
 	uint32_t num_pages[RTE_MAX_NUMA_NODES];
-				/**< number of hugepages of that size on each socket */
+	/**< number of hugepages of that size on each socket */
 	int lock_descriptor;    /**< file descriptor for hugepage dir */
 };
 
@@ -41,12 +41,26 @@ struct internal_config {
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
 	volatile unsigned no_shconf;      /**< true if there is no shared config */
+	volatile unsigned in_memory;
+	/**< true if DPDK should operate entirely in-memory and not create any
+	 * shared files or runtime data.
+	 */
 	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
 	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
 	/** true to try allocating memory on specific sockets */
 	volatile unsigned force_sockets;
 	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+	volatile unsigned force_socket_limits;
+	volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
+	volatile unsigned legacy_mem;
+	/**< true to enable legacy memory behavior (no dynamic allocation,
+	 * IOVA-contiguous segments).
+	 */
+	volatile unsigned single_file_segments;
+	/**< true if storing all pages within single files (per-page-size,
+	 * per-node) non-legacy mode only.
+	 */
 	volatile int syslog_facility;	  /**< facility passed to openlog() */
 	/** default interrupt mode for VFIO */
 	volatile enum rte_intr_mode vfio_intr_mode;
@@ -56,6 +70,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	volatile unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
new file mode 100644
index 00000000..36bb1a02
--- /dev/null
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef EAL_MEMALLOC_H
+#define EAL_MEMALLOC_H
+
+#include <stdbool.h>
+
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+
+/*
+ * Allocate segment of specified page size.
+ */
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket);
+
+/*
+ * Allocate `n_segs` segments.
+ *
+ * Note: `ms` can be NULL.
+ *
+ * Note: it is possible to request best-effort allocation by setting `exact` to
+ * `false`, in which case allocator will return however many pages it managed to
+ * allocate successfully.
+ */
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+		int socket, bool exact);
+
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
+/*
+ * Check if memory pointed to by `start` and of `length` that resides in
+ * memseg list `msl` is IOVA-contiguous.
+ */
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+		size_t len);
+
+/* synchronize local memory map to primary process */
+int
+eal_memalloc_sync_with_primary(void);
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+		rte_mem_event_callback_t clb, void *arg);
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg);
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+		size_t len);
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+
+int
+eal_memalloc_init(void);
+
+#endif /* EAL_MEMALLOC_H */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e86c7114..96e16678 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -45,8 +45,12 @@ enum {
 	OPT_NO_PCI_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY         "in-memory"
+	OPT_IN_MEMORY_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
 	OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT        "socket-limit"
+	OPT_SOCKET_LIMIT_NUM,
 #define OPT_SYSLOG            "syslog"
 	OPT_SYSLOG_NUM,
 #define OPT_VDEV              "vdev"
@@ -55,6 +59,10 @@ enum {
 	OPT_VFIO_INTR_NUM,
 #define OPT_VMWARE_TSC_MAP    "vmware-tsc-map"
 	OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_LEGACY_MEM    "legacy-mem"
+	OPT_LEGACY_MEM_NUM,
+#define OPT_SINGLE_FILE_SEGMENTS    "single-file-segments"
+	OPT_SINGLE_FILE_SEGMENTS_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0b287700..4f809a83 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -9,6 +9,8 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#include <rte_dev.h>
+
 /**
  * Initialize the memzone subsystem (private to eal).
  *
@@ -45,6 +47,18 @@ void eal_log_set_default(FILE *default_log);
 int rte_eal_cpu_init(void);
 
 /**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
  * Map memory
  *
  * This function is private to EAL.
@@ -81,6 +95,12 @@ int rte_eal_timer_init(void);
 int rte_eal_log_init(const char *id, int facility);
 
 /**
+ * Save the log regexp for later
+ */
+int rte_log_save_regexp(const char *type, int priority);
+int rte_log_save_pattern(const char *pattern, int priority);
+
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
@@ -127,6 +147,39 @@ int rte_eal_alarm_init(void);
 int rte_eal_check_module(const char *module_name);
 
 /**
+ * Get virtual area of specified size from the OS.
+ *
+ * This function is private to the EAL.
+ *
+ * @param requested_addr
+ *   Address where to request address space.
+ * @param size
+ *   Size of requested area.
+ * @param page_sz
+ *   Page size on which to align requested virtual area.
+ * @param flags
+ *   EAL_VIRTUAL_AREA_* flags.
+ * @param mmap_flags
+ *   Extra flags passed directly to mmap().
+ *
+ * @return
+ *   Virtual area address if successful.
+ *   NULL if unsuccessful.
+ */
+
+#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
+/**< don't fail if cannot get exact requested address. */
+#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
+/**< try getting smaller sized (decrement by page size) virtual areas if cannot
+ * get area of requested size.
+ */
+#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
+/**< immediately unmap reserved virtual area. */
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+		size_t page_sz, int flags, int mmap_flags);
+
+/**
  * Get cpu core_id.
  *
  * This function is private to the EAL.
@@ -205,4 +258,50 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
 
 int rte_mp_channel_init(void);
 
+/**
+ * Internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param device_name
+ *  The device name.
+ * @param event
+ *  the device event type.
+ */
+void dev_callback_process(char *device_name, enum rte_dev_event_type event);
+
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ *   rte_devargs structure to fill.
+ *
+ * @param devstr
+ *   Device string.
+ *
+ * @return
+ *   0 on success.
+ *   Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
index f3f3b6e3..40e14e56 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
@@ -1,33 +1,5 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_ATOMIC_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
index d2b7fa20..859562e5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
@@ -1,33 +1,5 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_ATOMIC_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
index 8af0a39a..9ec4a975 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_BYTEORDER_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
index b8f62889..022e7da5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_CPUFLAGS_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
index eb02d9b9..b5347be1 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_CPUFLAGS_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
index a8009a06..e8ffa894 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_CYCLES_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
index 9c1be71e..c4f974fe 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_CYCLES_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
index 1d562c3f..47dea9a8 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_MEMCPY_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index e4dafda1..eb02c3b4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_MEMCPY_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
index aa37de57..27870c2a 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_PREFETCH_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index 43cde172..e53420a0 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_PREFETCH_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
index 664bec88..18bb37b0 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
 /* copied from ppc_64 */
 
 #ifndef _RTE_RWLOCK_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
index 396a42e8..1a6916b6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
@@ -1,33 +1,5 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of RehiveTech nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
  */
 
 #ifndef _RTE_SPINLOCK_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index 39fce7b9..ce38350b 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -55,7 +55,7 @@ extern "C" {
  * Guarantees that the LOAD and STORE operations generated before the
  * barrier occur before the LOAD and STORE operations generated after.
  */
-#define	rte_mb()  {asm volatile("sync" : : : "memory"); }
+#define	rte_mb()  asm volatile("sync" : : : "memory")
 
 /**
  * Write memory barrier.
@@ -136,6 +136,12 @@ static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 	return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
 }
 
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+	return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+}
+
 /*------------------------- 32 bit atomic operations -------------------------*/
 
 static inline int
@@ -237,6 +243,13 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 
 	return ret == 0;
 }
+
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+	return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+}
+
 /*------------------------- 64 bit atomic operations -------------------------*/
 
 static inline int
@@ -431,7 +444,6 @@ static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
 {
 	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
 }
-
 /**
  * Atomically set a 64-bit counter to 0.
  *
@@ -442,6 +454,13 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
 {
 	v->cnt = 0;
 }
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+	return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+}
+
 #endif
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
index de8af19e..9fadc040 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
 #ifndef _RTE_RWLOCK_PPC_64_H_
 #define _RTE_RWLOCK_PPC_64_H_
 
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
index 5cfd3832..148398f5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -104,6 +104,18 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
 	return res;
 }
 
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+	asm volatile(
+			MPLOCKED
+			"xchgw %0, %1;"
+			: "=r" (val), "=m" (*dst)
+			: "0" (val),  "m" (*dst)
+			: "memory");         /* no-clobber list */
+	return val;
+}
+
 static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
 {
 	return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
@@ -178,6 +190,18 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 	return res;
 }
 
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+	asm volatile(
+			MPLOCKED
+			"xchgl %0, %1;"
+			: "=r" (val), "=m" (*dst)
+			: "0" (val),  "m" (*dst)
+			: "memory");         /* no-clobber list */
+	return val;
+}
+
 static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
 {
 	return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index fb3abf18..a932f354 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -98,6 +98,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 	return res;
 }
 
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dest, uint64_t val)
+{
+	uint64_t old;
+
+	do {
+		old = *dest;
+	} while (rte_atomic64_cmpset(dest, old, val) == 0);
+
+	return old;
+}
+
 static inline void
 rte_atomic64_init(rte_atomic64_t *v)
 {
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 1a53a766..fd2ec9c5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -71,6 +71,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 	return res;
 }
 
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+	asm volatile(
+			MPLOCKED
+			"xchgq %0, %1;"
+			: "=r" (val), "=m" (*dst)
+			: "0" (val),  "m" (*dst)
+			: "memory");         /* no-clobber list */
+	return val;
+}
+
 static inline void
 rte_atomic64_init(rte_atomic64_t *v)
 {
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index cc140ecc..7b758094 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -52,7 +52,7 @@ rte_memcpy(void *dst, const void *src, size_t n);
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -65,7 +65,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	__m256i ymm0;
@@ -78,7 +78,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	__m512i zmm0;
@@ -91,7 +91,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
  * Copy 128 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -102,7 +102,7 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
  * Copy 256 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov256(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -293,7 +293,7 @@ COPY_BLOCK_128_BACK63:
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -306,7 +306,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	__m256i ymm0;
@@ -319,7 +319,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
@@ -486,7 +486,7 @@ COPY_BLOCK_128_BACK31:
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -499,7 +499,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -510,7 +510,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -574,7 +574,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
  */
 #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset)                                                     \
 __extension__ ({                                                                                            \
-    int tmp;                                                                                                \
+    size_t tmp;                                                                                                \
     while (len >= 128 + 16 - offset) {                                                                      \
         xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16));                  \
         len -= 128;                                                                                         \
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
index 4b16887e..60321da0 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -76,10 +76,12 @@ static inline int rte_tm_supported(void)
 static inline int
 rte_try_tm(volatile int *lock)
 {
+	int retries;
+
 	if (!rte_rtm_supported)
 		return 0;
 
-	int retries = RTE_RTM_MAX_RETRIES;
+	retries = RTE_RTM_MAX_RETRIES;
 
 	while (likely(retries--)) {
 
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index 50e1b8a4..b99ba468 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -191,6 +191,36 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
 #endif
 
 /**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ *   ret = *dst
+ *   *dst = val;
+ *   return ret;
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param val
+ *   The new value.
+ * @return
+ *   The original value at that location
+ */
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+	return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
  * The atomic counter structure.
  */
 typedef struct {
@@ -444,6 +474,36 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 #endif
 
 /**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ *   ret = *dst
+ *   *dst = val;
+ *   return ret;
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param val
+ *   The new value.
+ * @return
+ *   The original value at that location
+ */
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+	return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
  * The atomic counter structure.
  */
 typedef struct {
@@ -696,6 +756,36 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 #endif
 
 /**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ *   ret = *dst
+ *   *dst = val;
+ *   return ret;
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param val
+ *   The new value.
+ * @return
+ *   The original value at that location
+ */
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+	return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
  * The atomic counter structure.
  */
 typedef struct {
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
index 9bed85cc..7d9a1463 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -123,7 +123,7 @@ typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */
 static inline uint16_t
 rte_constant_bswap16(uint16_t x)
 {
-	return RTE_STATIC_BSWAP16(x);
+	return (uint16_t)RTE_STATIC_BSWAP16(x);
 }
 
 /*
@@ -135,7 +135,7 @@ rte_constant_bswap16(uint16_t x)
 static inline uint32_t
 rte_constant_bswap32(uint32_t x)
 {
-	return RTE_STATIC_BSWAP32(x);
+	return (uint32_t)RTE_STATIC_BSWAP32(x);
 }
 
 /*
@@ -147,7 +147,7 @@ rte_constant_bswap32(uint32_t x)
 static inline uint64_t
 rte_constant_bswap64(uint64_t x)
 {
-	return RTE_STATIC_BSWAP64(x);
+	return (uint64_t)RTE_STATIC_BSWAP64(x);
 }
 
 
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
index 8d31687d..156ea002 100644
--- a/lib/librte_eal/common/include/generic/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -64,4 +64,25 @@ rte_cpu_check_supported(void);
 int
 rte_cpu_is_supported(void);
 
+/**
+ * This function attempts to retrieve a value from the auxiliary vector.
+ * If it is unsuccessful, the result will be 0, and errno will be set.
+ *
+ * @return A value from the auxiliary vector.  When the value is 0, check
+ * errno to determine if an error occurred.
+ */
+unsigned long
+rte_cpu_getauxval(unsigned long type);
+
+/**
+ * This function retrieves a value from the auxiliary vector, and compares it
+ * as a string against the value retrieved.
+ *
+ * @return The result of calling strcmp() against the value retrieved from
+ * the auxiliary vector.  When the value is 0 (meaning a match is found),
+ * check errno to determine if an error occurred.
+ */
+int
+rte_cpu_strcmp_auxval(unsigned long type, const char *str);
+
 #endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
index 899e9bc4..5751a0e6 100644
--- a/lib/librte_eal/common/include/generic/rte_rwlock.h
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -71,7 +71,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl)
 			continue;
 		}
 		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
-					      x, x + 1);
+					      (uint32_t)x, (uint32_t)(x + 1));
 	}
 }
 
@@ -107,7 +107,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl)
 			continue;
 		}
 		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
-					      0, -1);
+					      0, (uint32_t)-1);
 	}
 }
 
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 7d4935fc..d9facc64 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
 /**
  * Bitmap initialization
  *
- * @param mem_size
- *   Minimum expected size of bitmap.
+ * @param n_bits
+ *   Number of pre-allocated bits in array2.
  * @param mem
  *   Base address of array1 and array2.
- * @param n_bits
- *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ *   Minimum expected size of bitmap.
  * @return
  *   Handle to bitmap instance.
  */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index 6fb08341..b7b5b084 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -211,6 +211,7 @@ struct rte_bus {
 	rte_bus_parse_t parse;       /**< Parse a device name */
 	struct rte_bus_conf conf;    /**< Bus configuration */
 	rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+	rte_dev_iterate_t dev_iterate; /**< Device iterator. */
 };
 
 /**
@@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void);
  * The constructor has higher priority than PMD constructors.
  */
 #define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, 110); \
-static void businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
 {\
 	(bus).name = RTE_STR(nm);\
 	rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
new file mode 100644
index 00000000..276c91e9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic coprocessor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+	TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+	const char *name; /**< Name of the class */
+	rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ *	Class under test.
+ *
+ * @param data
+ *	Data to compare against.
+ *
+ * @return
+ *	0 if the class matches the data.
+ *	!0 if the class does not match.
+ *	<0 if ordering is possible and the class is lower than the data.
+ *	>0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ *	Starting point for the iteration.
+ *
+ * @param cmp
+ *	Comparison function.
+ *
+ * @param data
+ *	 Data to pass to comparison function.
+ *
+ * @return
+ *	 A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+	       const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ *   A pointer to a rte_class structure describing the class
+ *   to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ *   A pointer to a rte_class structure describing the class
+ *   to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+	(cls).name = RTE_STR(nm); \
+	rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+	rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index c7803e41..069c13ec 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -81,6 +81,26 @@ typedef uint16_t unaligned_uint16_t;
  */
 #define RTE_SET_USED(x) (void)(x)
 
+#define RTE_PRIORITY_LOG 101
+#define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
+#define RTE_PRIORITY_LAST 65535
+
+#define RTE_PRIO(prio) \
+	RTE_PRIORITY_ ## prio
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ *   Constructor function.
+ * @param prio
+ *   Priority number must be above 100.
+ *   Lowest number is the first to run.
+ */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
+
 /**
  * Run function before main() with low priority.
  *
@@ -90,19 +110,30 @@ typedef uint16_t unaligned_uint16_t;
  *   Constructor function.
  */
 #define RTE_INIT(func) \
-static void __attribute__((constructor, used)) func(void)
+	RTE_INIT_PRIO(func, LAST)
 
 /**
- * Run function before main() with high priority.
+ * Run after main() with low priority.
  *
  * @param func
- *   Constructor function.
+ *   Destructor function name.
  * @param prio
  *   Priority number must be above 100.
- *   Lowest number is the first to run.
+ *   Lowest number is the last to run.
  */
-#define RTE_INIT_PRIO(func, prio) \
-static void __attribute__((constructor(prio), used)) func(void)
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ *   Destructor function name.
+ */
+#define RTE_FINI(func) \
+	RTE_FINI_PRIO(func, LAST)
 
 /**
  * Force a function to be inlined
@@ -117,7 +148,7 @@ static void __attribute__((constructor(prio), used)) func(void)
 /*********** Macros for pointer arithmetic ********/
 
 /**
- * add a byte-value offset from a pointer
+ * add a byte-value offset to a pointer
  */
 #define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
 
@@ -191,6 +222,22 @@ static void __attribute__((constructor(prio), used)) func(void)
 #define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
 
 /**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no lower
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_CEIL(v, mul) \
+	(((v + (typeof(v))(mul) - 1) / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no higher
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_FLOOR(v, mul) \
+	((v / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
  * Checks if a pointer is aligned to a given power-of-two value
  *
  * @param ptr
@@ -223,9 +270,59 @@ extern int RTE_BUILD_BUG_ON_detected_error;
 } while(0)
 #endif
 
+/**
+ * Combines 32b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param x
+ *    The integer whose MSBs need to be combined with its LSBs
+ * @return
+ *    The combined value.
+ */
+static inline uint32_t
+rte_combine32ms1b(register uint32_t x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+
+	return x;
+}
+
+/**
+ * Combines 64b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param v
+ *    The integer whose MSBs need to be combined with its LSBs
+ * @return
+ *    The combined value.
+ */
+static inline uint64_t
+rte_combine64ms1b(register uint64_t v)
+{
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v |= v >> 32;
+
+	return v;
+}
+
 /*********** Macros to work with powers of 2 ********/
 
 /**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
  * Returns true if n is a power of 2
  * @param n
  *     Number to check
@@ -250,16 +347,29 @@ static inline uint32_t
 rte_align32pow2(uint32_t x)
 {
 	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
+	x = rte_combine32ms1b(x);
 
 	return x + 1;
 }
 
 /**
+ * Aligns input parameter to the previous power of 2
+ *
+ * @param x
+ *   The integer value to algin
+ *
+ * @return
+ *   Input parameter aligned to the previous power of 2
+ */
+static inline uint32_t
+rte_align32prevpow2(uint32_t x)
+{
+	x = rte_combine32ms1b(x);
+
+	return x - (x >> 1);
+}
+
+/**
  * Aligns 64b input parameter to the next power of 2
  *
  * @param v
@@ -272,16 +382,28 @@ static inline uint64_t
 rte_align64pow2(uint64_t v)
 {
 	v--;
-	v |= v >> 1;
-	v |= v >> 2;
-	v |= v >> 4;
-	v |= v >> 8;
-	v |= v >> 16;
-	v |= v >> 32;
+	v = rte_combine64ms1b(v);
 
 	return v + 1;
 }
 
+/**
+ * Aligns 64b input parameter to the previous power of 2
+ *
+ * @param v
+ *   The 64b value to align
+ *
+ * @return
+ *   Input parameter aligned to the previous power of 2
+ */
+static inline uint64_t
+rte_align64prevpow2(uint64_t v)
+{
+	v = rte_combine64ms1b(v);
+
+	return v - (v >> 1);
+}
+
 /*********** Macros for calculating min and max **********/
 
 /**
@@ -320,7 +442,7 @@ rte_align64pow2(uint64_t v)
 static inline uint32_t
 rte_bsf32(uint32_t v)
 {
-	return __builtin_ctz(v);
+	return (uint32_t)__builtin_ctz(v);
 }
 
 /**
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index b688f1ef..b80a8059 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -24,6 +24,25 @@ extern "C" {
 #include <rte_compat.h>
 #include <rte_log.h>
 
+/**
+ * The device event type.
+ */
+enum rte_dev_event_type {
+	RTE_DEV_EVENT_ADD,	/**< device being added */
+	RTE_DEV_EVENT_REMOVE,	/**< device being removed */
+	RTE_DEV_EVENT_MAX	/**< max value of this enum */
+};
+
+struct rte_dev_event {
+	enum rte_dev_event_type type;	/**< device event type */
+	int subsystem;			/**< subsystem id */
+	char *devname;			/**< device name */
+};
+
+typedef void (*rte_dev_event_cb_fn)(char *device_name,
+					enum rte_dev_event_type event,
+					void *cb_arg);
+
 __attribute__((format(printf, 2, 0)))
 static inline void
 rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
@@ -32,24 +51,25 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
 
 	va_start(ap, fmt);
 
-	char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
+	{
+		char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
 
-	va_end(ap);
+		va_end(ap);
 
-	va_start(ap, fmt);
-	vsnprintf(buffer, sizeof(buffer), fmt, ap);
-	va_end(ap);
+		va_start(ap, fmt);
+		vsnprintf(buffer, sizeof(buffer), fmt, ap);
+		va_end(ap);
 
-	rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer);
+		rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s",
+			func_name, buffer);
+	}
 }
 
 /*
  * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
  * RTE_*_RET() macros defined below is compiled in debug mode.
  */
-#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
-	defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
-	defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#if defined(RTE_LIBRTE_EVENTDEV_DEBUG)
 #define RTE_PMD_DEBUG_TRACE(...) \
 	rte_pmd_debug_trace(__func__, __VA_ARGS__)
 #else
@@ -154,6 +174,7 @@ struct rte_device {
  * @return
  *   0 on success, negative on error.
  */
+__rte_deprecated
 int rte_eal_dev_attach(const char *name, const char *devargs);
 
 /**
@@ -164,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
  * @return
  *   0 on success, negative on error.
  */
+__rte_deprecated
 int rte_eal_dev_detach(struct rte_device *dev);
 
 /**
@@ -263,8 +285,179 @@ __attribute__((used)) = str
 static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
 __attribute__((used)) = str
 
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+	const char *dev_str; /**< device string. */
+	const char *bus_str; /**< bus-related part of device string. */
+	const char *cls_str; /**< class-related part of device string. */
+	struct rte_bus *bus; /**< bus handle. */
+	struct rte_class *cls; /**< class handle. */
+	struct rte_device *device; /**< current position. */
+	void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ *   Starting iteration context.
+ *
+ * @param devstr
+ *   Device description string.
+ *
+ * @param it
+ *   Device iterator.
+ *
+ * @return
+ *   The address of the current element matching the device description
+ *   string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+				   const char *devstr,
+				   const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ *   Device iterator handle.
+ *
+ * @param str
+ *   Device description string.
+ *
+ * @return
+ *   0 on successful initialization.
+ *   <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ *   Device iterator handle.
+ *
+ * @return
+ *   An rte_device handle if found.
+ *   NULL if an error occurred (rte_errno is set).
+ *   NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+	for (rte_dev_iterator_init(it, devstr), \
+	     dev = rte_dev_iterator_next(it); \
+	     dev != NULL; \
+	     dev = rte_dev_iterator_next(it))
+
 #ifdef __cplusplus
 }
 #endif
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It registers the callback for the specific device.
+ * Multiple callbacks cal be registered at the same time.
+ *
+ * @param device_name
+ *  The device name, that is the param name of the struct rte_device,
+ *  null value means for all devices.
+ * @param cb_fn
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_callback_register(const char *device_name,
+				rte_dev_event_cb_fn cb_fn,
+				void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It unregisters the callback according to the specified device.
+ *
+ * @param device_name
+ *  The device name, that is the param name of the struct rte_device,
+ *  null value means for all devices and their callbacks.
+ * @param cb_fn
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback, (void *)-1 means to remove all
+ *  registered which has the same callback address.
+ *
+ * @return
+ *  - On success, return the number of callback entities removed.
+ *  - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_callback_unregister(const char *device_name,
+				  rte_dev_event_cb_fn cb_fn,
+				  void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start the device event monitoring.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_monitor_start(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop the device event monitoring.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_monitor_stop(void);
+
 #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 84e5e23c..097a4ce7 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,21 +51,23 @@ struct rte_devargs {
 	enum rte_devtype type;
 	/** Device policy. */
 	enum rte_dev_policy policy;
-	/** Bus handle for the device. */
-	struct rte_bus *bus;
 	/** Name of the device. */
 	char name[RTE_DEV_NAME_MAX_LEN];
+	RTE_STD_C11
+	union {
 	/** Arguments string as given by user or "" for no argument. */
-	char *args;
+		char *args;
+		const char *drv_str;
+	};
+	struct rte_bus *bus; /**< bus handle. */
+	struct rte_class *cls; /**< class handle. */
+	const char *bus_str; /**< bus-related part of device string. */
+	const char *cls_str; /**< class-related part of device string. */
+	const char *data; /**< Device string storage. */
 };
 
-/** user device double-linked queue type definition */
-TAILQ_HEAD(rte_devargs_list, rte_devargs);
-
-/** Global list of user devices */
-extern struct rte_devargs_list devargs_list;
-
 /**
+ * @deprecated
  * Parse a devargs string.
  *
  * For PCI devices, the format of arguments string is "PCI_ADDR" or
@@ -90,6 +92,7 @@ extern struct rte_devargs_list devargs_list;
  *   - 0 on success
  *   - A negative value on error
  */
+__rte_deprecated
 int rte_eal_parse_devargs_str(const char *devargs_str,
 				char **drvname, char **drvargs);
 
@@ -100,18 +103,73 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
  * in argument. Store which bus will handle the device, its name
  * and the eventual device parameters.
  *
+ * The syntax is:
+ *
+ *     bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ *     pci:0000:05.00.0,arg=val
+ *     05.00.0,arg=val
+ *     vdev:net_ring0
+ *
+ * @param da
+ *   The devargs structure holding the device information.
+ *
  * @param dev
- *   The device declaration string.
+ *   String describing a device.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative errno on error.
+ */
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
+ * The device string is built with a printf-like syntax.
+ *
+ * The syntax is:
+ *
+ *     bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ *     pci:0000:05.00.0,arg=val
+ *     05.00.0,arg=val
+ *     vdev:net_ring0
+ *
  * @param da
  *   The devargs structure holding the device information.
+ * @param format
+ *   Format string describing a device.
  *
  * @return
  *   - 0 on success.
  *   - Negative errno on error.
  */
-int __rte_experimental
-rte_eal_devargs_parse(const char *dev,
-		      struct rte_devargs *da);
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da,
+		   const char *format, ...)
+__attribute__((format(printf, 2, 0)));
 
 /**
  * Insert an rte_devargs in the global list.
@@ -123,21 +181,30 @@ rte_eal_devargs_parse(const char *dev,
  *   - 0 on success
  *   - Negative on error.
  */
-int __rte_experimental
-rte_eal_devargs_insert(struct rte_devargs *da);
+__rte_experimental
+int
+rte_devargs_insert(struct rte_devargs *da);
 
 /**
  * Add a device to the user device list
+ * See rte_devargs_parse() for details.
  *
- * For PCI devices, the format of arguments string is "PCI_ADDR" or
- * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
- * "04:00.0,arg=val".
+ * @param devtype
+ *   The type of the device.
+ * @param devargs_str
+ *   The arguments as given by the user.
  *
- * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
- * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the
- * driver name is not checked by this function, it is done when probing
- * the drivers.
+ * @return
+ *   - 0 on success
+ *   - A negative value on error
+ */
+__rte_experimental
+int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
+
+/**
+ * @deprecated
+ * Add a device to the user device list
+ * See rte_devargs_parse() for details.
  *
  * @param devtype
  *   The type of the device.
@@ -148,6 +215,7 @@ rte_eal_devargs_insert(struct rte_devargs *da);
  *   - 0 on success
  *   - A negative value on error
  */
+__rte_deprecated
 int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
 
 /**
@@ -166,10 +234,25 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
  *   <0 on error.
  *   >0 if the devargs was not within the user device list.
  */
-int __rte_experimental rte_eal_devargs_remove(const char *busname,
-					  const char *devname);
+__rte_experimental
+int rte_devargs_remove(const char *busname,
+		       const char *devname);
+
+/**
+ * Count the number of user devices of a specified type
+ *
+ * @param devtype
+ *   The type of the devices to counted.
+ *
+ * @return
+ *   The number of devices.
+ */
+__rte_experimental
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype);
 
 /**
+ * @deprecated
  * Count the number of user devices of a specified type
  *
  * @param devtype
@@ -178,6 +261,7 @@ int __rte_experimental rte_eal_devargs_remove(const char *busname,
  * @return
  *   The number of devices.
  */
+__rte_deprecated
 unsigned int
 rte_eal_devargs_type_count(enum rte_devtype devtype);
 
@@ -187,8 +271,47 @@ rte_eal_devargs_type_count(enum rte_devtype devtype);
  * @param f
  *   A pointer to a file for output
  */
+__rte_experimental
+void rte_devargs_dump(FILE *f);
+
+/**
+ * @deprecated
+ * This function dumps the list of user device and their arguments.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+__rte_deprecated
 void rte_eal_devargs_dump(FILE *f);
 
+/**
+ * Find next rte_devargs matching the provided bus name.
+ *
+ * @param busname
+ *   Limit the iteration to devargs related to buses
+ *   matching this name.
+ *   Will return any next rte_devargs if NULL.
+ *
+ * @param start
+ *   Starting iteration point. The iteration will start at
+ *   the first rte_devargs if NULL.
+ *
+ * @return
+ *   Next rte_devargs entry matching the requested bus,
+ *   NULL if there is none.
+ */
+__rte_experimental
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start);
+
+/**
+ * Iterate over all rte_devargs for a specific bus.
+ */
+#define RTE_EAL_DEVARGS_FOREACH(busname, da) \
+	for (da = rte_devargs_next(busname, NULL); \
+	     da != NULL; \
+	     da = rte_devargs_next(busname, da)) \
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 044474e6..e114dcbd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -57,6 +57,8 @@ enum rte_proc_type_t {
 struct rte_config {
 	uint32_t master_lcore;       /**< Id of the master lcore */
 	uint32_t lcore_count;        /**< Number of available logical cores. */
+	uint32_t numa_node_count;    /**< Number of detected NUMA nodes. */
+	uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
 	uint32_t service_lcore_count;/**< Number of available service cores. */
 	enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
 
@@ -230,6 +232,16 @@ struct rte_mp_reply {
 typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer);
 
 /**
+ * Asynchronous reply function typedef used by other components.
+ *
+ * As we create socket channel for primary/secondary communication, use
+ * this function typedef to register action for coming responses to asynchronous
+ * requests.
+ */
+typedef int (*rte_mp_async_reply_t)(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply);
+
+/**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
@@ -314,13 +326,39 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
  *  - On failure, return -1, and the reason will be stored in rte_errno.
  */
 int __rte_experimental
-rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	       const struct timespec *ts);
 
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Send a request to the peer process and expect a reply in a separate callback.
+ *
+ * This function sends a request message to the peer process, and will not
+ * block. Instead, reply will be received in a separate callback.
+ *
+ * @param req
+ *   The req argument contains the customized request message.
+ *
+ * @param ts
+ *   The ts argument specifies how long we can wait for the peer(s) to reply.
+ *
+ * @param clb
+ *   The callback to trigger when all responses for this request have arrived.
+ *
+ * @return
+ *  - On success, return 0.
+ *  - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+int __rte_experimental
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+		rte_mp_async_reply_t clb);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
  * Send a reply to the peer process.
  *
  * This function will send a reply message in response to a request message
@@ -452,25 +490,13 @@ static inline int rte_gettid(void)
 enum rte_iova_mode rte_eal_iova_mode(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get user provided pool ops name for mbuf
  *
  * @return
  *   returns user provided pool ops name.
  */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void);
-
-/**
- * Get default pool ops name for mbuf
- *
- * @return
- *   returns default pool ops name.
- */
 const char *
-rte_eal_mbuf_default_mempool_ops(void);
+rte_eal_mbuf_user_pool_ops(void);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 3f792a97..6eb49327 100644
--- a/lib/librte_eal/common/include/rte_eal_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -34,6 +34,7 @@ enum rte_intr_handle_type {
 	RTE_INTR_HANDLE_ALARM,        /**< alarm handle */
 	RTE_INTR_HANDLE_EXT,          /**< external handler */
 	RTE_INTR_HANDLE_VDEV,         /**< virtual device */
+	RTE_INTR_HANDLE_DEV_EVENT,    /**< device event handle */
 	RTE_INTR_HANDLE_MAX           /**< count of elements */
 };
 
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 29fa0b60..aff0688d 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -12,12 +12,31 @@
 #include <rte_malloc_heap.h>
 #include <rte_rwlock.h>
 #include <rte_pause.h>
+#include <rte_fbarray.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
+ * memseg list is a special case as we need to store a bunch of other data
+ * together with the array itself.
+ */
+struct rte_memseg_list {
+	RTE_STD_C11
+	union {
+		void *base_va;
+		/**< Base virtual address for this memseg list. */
+		uint64_t addr_64;
+		/**< Makes sure addr is always 64-bits */
+	};
+	int socket_id; /**< Socket ID for all memsegs in this list. */
+	uint64_t page_sz; /**< Page size for all memsegs in this list. */
+	volatile uint32_t version; /**< version number for multiprocess sync. */
+	struct rte_fbarray memseg_arr;
+};
+
+/**
  * the structure for the memory configuration for the RTE.
  * Used by the rte_config structure. It is separated out, as for multi-process
  * support, the memory details should be shared across instances
@@ -40,11 +59,14 @@ struct rte_mem_config {
 	rte_rwlock_t qlock;   /**< used for tailq operation for thread safe. */
 	rte_rwlock_t mplock;  /**< only used by mempool LIB for thread-safe. */
 
-	uint32_t memzone_cnt; /**< Number of allocated memzones */
+	rte_rwlock_t memory_hotplug_lock;
+	/**< indicates whether memory hotplug request is in progress. */
 
 	/* memory segments and zones */
-	struct rte_memseg memseg[RTE_MAX_MEMSEG];    /**< Physmem descriptors. */
-	struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+	struct rte_fbarray memzones; /**< Memzone descriptors. */
+
+	struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
+	/**< list of dynamic arrays holding memsegs */
 
 	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
new file mode 100644
index 00000000..5d880551
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef RTE_FBARRAY_H
+#define RTE_FBARRAY_H
+
+/**
+ * @file
+ *
+ * File-backed shared indexed array for DPDK.
+ *
+ * Basic workflow is expected to be the following:
+ *  1) Allocate array either using ``rte_fbarray_init()`` or
+ *     ``rte_fbarray_attach()`` (depending on whether it's shared between
+ *     multiple DPDK processes)
+ *  2) find free spots using ``rte_fbarray_find_next_free()``
+ *  3) get pointer to data in the free spot using ``rte_fbarray_get()``, and
+ *     copy data into the pointer (element size is fixed)
+ *  4) mark entry as used using ``rte_fbarray_set_used()``
+ *
+ * Calls to ``rte_fbarray_init()`` and ``rte_fbarray_destroy()`` will have
+ * consequences for all processes, while calls to ``rte_fbarray_attach()`` and
+ * ``rte_fbarray_detach()`` will only have consequences within a single process.
+ * Therefore, it is safe to call ``rte_fbarray_attach()`` or
+ * ``rte_fbarray_detach()`` while another process is using ``rte_fbarray``,
+ * provided no other thread within the same process will try to use
+ * ``rte_fbarray`` before attaching or after detaching. It is not safe to call
+ * ``rte_fbarray_init()`` or ``rte_fbarray_destroy()`` while another thread or
+ * another process is using ``rte_fbarray``.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_compat.h>
+#include <rte_rwlock.h>
+
+#define RTE_FBARRAY_NAME_LEN 64
+
+struct rte_fbarray {
+	char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
+	unsigned int count;              /**< number of entries stored */
+	unsigned int len;                /**< current length of the array */
+	unsigned int elt_sz;             /**< size of each element */
+	void *data;                      /**< data pointer */
+	rte_rwlock_t rwlock;             /**< multiprocess lock */
+};
+
+/**
+ * Set up ``rte_fbarray`` structure and allocate underlying resources.
+ *
+ * Call this function to correctly set up ``rte_fbarray`` and allocate
+ * underlying files that will be backing the data in the current process. Note
+ * that in order to use and share ``rte_fbarray`` between multiple processes,
+ * data pointed to by ``arr`` pointer must itself be allocated in shared memory.
+ *
+ * @param arr
+ *   Valid pointer to allocated ``rte_fbarray`` structure.
+ *
+ * @param name
+ *   Unique name to be assigned to this array.
+ *
+ * @param len
+ *   Number of elements initially available in the array.
+ *
+ * @param elt_sz
+ *   Size of each element.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+		unsigned int elt_sz);
+
+
+/**
+ * Attach to a file backing an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to attach to file that will be backing the data in the
+ * current process. The structure must have been previously correctly set up
+ * with a call to ``rte_fbarray_init()``. Calls to ``rte_fbarray_attach()`` are
+ * usually meant to be performed in a multiprocessing scenario, with data
+ * pointed to by ``arr`` pointer allocated in shared memory.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up rte_fbarray structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure, and remove the underlying file.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within the current process. This will also
+ * zero-fill data pointed to by ``arr`` pointer and remove the underlying file
+ * backing the data, so it is expected that by the time this function is called,
+ * all other processes have detached from this ``rte_fbarray``.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within current process.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr);
+
+
+/**
+ * Get pointer to element residing at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Index of an element to get a pointer to.
+ *
+ * @return
+ *  - non-NULL pointer on success.
+ *  - NULL on failure, with ``rte_errno`` indicating reason for failure.
+ */
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of a specified element within the array.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param elt
+ *   Pointer to element to find index to.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt);
+
+
+/**
+ * Mark specified element as used.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to mark as used.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Mark specified element as free.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to mark as free.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Check whether element at specified index is marked as used.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to check as used.
+ *
+ * @return
+ *  - 1 if element is used.
+ *  - 0 if element is unused.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of next free element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next used element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next chunk of ``n`` free elements, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of free elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find index of next chunk of ``n`` used elements, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of used elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find how many more free entries there are, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr,
+		unsigned int start);
+
+
+/**
+ * Find how many more used entries there are, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of free elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of used elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+		unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Dump ``rte_fbarray`` metadata.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param f
+ *   File object to dump information into.
+ */
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FBARRAY_H */
diff --git a/lib/librte_eal/common/include/rte_hypervisor.h b/lib/librte_eal/common/include/rte_hypervisor.h
index 8d8aac74..5fe719c1 100644
--- a/lib/librte_eal/common/include/rte_hypervisor.h
+++ b/lib/librte_eal/common/include/rte_hypervisor.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
  */
 
 #ifndef RTE_HYPERVISOR_H
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 04722203..6e09d918 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -119,7 +119,7 @@ rte_lcore_index(int lcore_id)
 	if (lcore_id >= RTE_MAX_LCORE)
 		return -1;
 	if (lcore_id < 0)
-		lcore_id = rte_lcore_id();
+		lcore_id = (int)rte_lcore_id();
 	return lcore_config[lcore_id].core_index;
 }
 
@@ -132,6 +132,36 @@ rte_lcore_index(int lcore_id)
 unsigned rte_socket_id(void);
 
 /**
+ * Return number of physical sockets detected on the system.
+ *
+ * Note that number of nodes may not be correspondent to their physical id's:
+ * for example, a system may report two socket id's, but the actual socket id's
+ * may be 0 and 8.
+ *
+ * @return
+ *   the number of physical sockets as recognized by EAL
+ */
+unsigned int __rte_experimental
+rte_socket_count(void);
+
+/**
+ * Return socket id with a particular index.
+ *
+ * This will return socket id at a particular position in list of all detected
+ * physical socket id's. For example, on a machine with sockets [0, 8], passing
+ * 1 as a parameter will return 8.
+ *
+ * @param idx
+ *   index of physical socket id to return
+ *
+ * @return
+ *   - physical socket id as recognized by EAL
+ *   - -1 on error, with errno set to EINVAL
+ */
+int __rte_experimental
+rte_socket_id_by_idx(unsigned int idx);
+
+/**
  * Get the ID of the physical socket of the specified lcore
  *
  * @param lcore_id
@@ -247,6 +277,32 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
 int rte_thread_setname(pthread_t id, const char *name);
 
 /**
+ * Create a control thread.
+ *
+ * Wrapper to pthread_create(), pthread_setname_np() and
+ * pthread_setaffinity_np(). The dataplane and service lcores are
+ * excluded from the affinity of the new thread.
+ *
+ * @param thread
+ *   Filled with the thread id of the new created thread.
+ * @param name
+ *   The name of the control thread (max 16 characters including '\0').
+ * @param attr
+ *   Attributes for the new thread.
+ * @param start_routine
+ *   Function to be executed by the new thread.
+ * @param arg
+ *   Argument passed to start_routine.
+ * @return
+ *   On success, returns 0; on error, it returns a negative value
+ *   corresponding to the error number.
+ */
+__rte_experimental int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+		const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg);
+
+/**
  * Test if the core supplied has a specific role
  *
  * @param lcore_id
@@ -255,7 +311,7 @@ int rte_thread_setname(pthread_t id, const char *name);
  * @param role
  *   The role to be checked against.
  * @return
- *   On success, return 0; otherwise return a negative value.
+ *   Boolean value: positive if test is true; otherwise returns 0.
  */
 int
 rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index 9029c785..2f789cb9 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -20,6 +20,7 @@ extern "C" {
 #include <stdint.h>
 #include <stdio.h>
 #include <stdarg.h>
+#include <sys/queue.h>
 
 #include <rte_common.h>
 #include <rte_config.h>
@@ -129,16 +130,28 @@ uint32_t rte_log_get_global_level(void);
 int rte_log_get_level(uint32_t logtype);
 
 /**
- * Set the log level for a given type.
+ * Set the log level for a given type based on shell pattern.
  *
  * @param pattern
- *   The regexp identifying the log type.
+ *   The match pattern identifying the log type.
+ * @param level
+ *   The level to be set.
+ * @return
+ *   0 on success, a negative value if level is invalid.
+ */
+int rte_log_set_level_pattern(const char *pattern, uint32_t level);
+
+/**
+ * Set the log level for a given type based on regular expression.
+ *
+ * @param regex
+ *   The regular expression identifying the log type.
  * @param level
  *   The level to be set.
  * @return
  *   0 on success, a negative value if level is invalid.
  */
-int rte_log_set_level_regexp(const char *pattern, uint32_t level);
+int rte_log_set_level_regexp(const char *regex, uint32_t level);
 
 /**
  * Set the log level for a given type.
@@ -195,6 +208,27 @@ int rte_log_cur_msg_logtype(void);
 int rte_log_register(const char *name);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a dynamic log type and try to pick its level from EAL options
+ *
+ * rte_log_register() is called inside. If successful, the function tries
+ * to search for matching regexp in the list of EAL log level options and
+ * pick the level from the last matching entry. If nothing can be applied
+ * from the list, the level will be set to the user-defined default value.
+ *
+ * @param name
+ *    Name for the log type to be registered
+ * @param level_def
+ *    Fallback level to be set if the global list has no matching options
+ * @return
+ *    - >=0: the newly registered log type
+ *    - <0: rte_log_register() error value
+ */
+int rte_log_register_type_and_pick_level(const char *name, uint32_t level_def);
+
+/**
  * Dump log information.
  *
  * Dump the global level and the registered log types.
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index f02a8ba1..a9fb7e45 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -13,6 +13,7 @@
 
 #include <stdio.h>
 #include <stddef.h>
+#include <rte_compat.h>
 #include <rte_memory.h>
 
 #ifdef __cplusplus
@@ -278,6 +279,15 @@ void
 rte_malloc_dump_stats(FILE *f, const char *type);
 
 /**
+ * Dump contents of all malloc heaps to a file.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void __rte_experimental
+rte_malloc_dump_heaps(FILE *f);
+
+/**
  * Set the maximum amount of allocated memory for this type.
  *
  * This is not yet implemented
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index ba99ed90..d43fa909 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -13,12 +13,18 @@
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
 
+/* dummy definition, for pointers */
+struct malloc_elem;
+
 /**
  * Structure to hold malloc heap
  */
 struct malloc_heap {
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+	struct malloc_elem *volatile first;
+	struct malloc_elem *volatile last;
+
 	unsigned alloc_count;
 	size_t total_size;
 } __rte_cache_aligned;
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 302f865b..c4b7f4cf 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -20,8 +20,12 @@ extern "C" {
 #endif
 
 #include <rte_common.h>
+#include <rte_compat.h>
 #include <rte_config.h>
 
+/* forward declaration for pointers */
+struct rte_memseg_list;
+
 __extension__
 enum rte_page_sizes {
 	RTE_PGSIZE_4K    = 1ULL << 12,
@@ -79,6 +83,8 @@ typedef uint64_t rte_iova_t;
 /**
  * Physical memory segment descriptor.
  */
+#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
+/**< Prevent this segment from being freed back to the OS. */
 struct rte_memseg {
 	RTE_STD_C11
 	union {
@@ -95,6 +101,7 @@ struct rte_memseg {
 	int32_t socket_id;          /**< NUMA socket ID. */
 	uint32_t nchannel;          /**< Number of channels. */
 	uint32_t nrank;             /**< Number of ranks. */
+	uint32_t flags;             /**< Memseg-specific flags */
 } __rte_packed;
 
 /**
@@ -130,25 +137,192 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
 rte_iova_t rte_mem_virt2iova(const void *virt);
 
 /**
- * Get the layout of the available physical memory.
+ * Get virtual memory address corresponding to iova address.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param iova
+ *   The iova address.
+ * @return
+ *   Virtual address corresponding to iova address (or NULL if address does not
+ *   exist within DPDK memory map).
+ */
+__rte_experimental void *
+rte_mem_iova2virt(rte_iova_t iova);
+
+/**
+ * Get memseg to which a particular virtual address belongs.
+ *
+ * @param virt
+ *   The virtual address.
+ * @param msl
+ *   The memseg list in which to look up based on ``virt`` address
+ *   (can be NULL).
+ * @return
+ *   Memseg pointer on success, or NULL on error.
+ */
+__rte_experimental struct rte_memseg *
+rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
+
+/**
+ * Get memseg list corresponding to virtual memory address.
+ *
+ * @param virt
+ *   The virtual address.
+ * @return
+ *   Memseg list to which this virtual address belongs to.
+ */
+__rte_experimental struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *virt);
+
+/**
+ * Memseg walk function prototype.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg);
+
+/**
+ * Memseg contig walk function prototype. This will trigger a callback on every
+ * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each
+ * callback call will be [``ms->addr``, ``ms->addr + len``).
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, size_t len, void *arg);
+
+/**
+ * Memseg list walk function prototype. This will trigger a callback on every
+ * allocated memseg list.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
+		void *arg);
+
+/**
+ * Walk list of all memsegs.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
  *
- * It can be useful for an application to have the full physical
- * memory layout to decide the size of a memory zone to reserve. This
- * table is stored in rte_config (see rte_eal_get_configuration()).
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
  *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
  * @return
- *  - On success, return a pointer to a read-only table of struct
- *    rte_physmem_desc elements, containing the layout of all
- *    addressable physical memory. The last element of the table
- *    contains a NULL address.
- *  - On error, return NULL. This should not happen since it is a fatal
- *    error that will probably cause the entire system to panic.
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
  */
-const struct rte_memseg *rte_eal_get_physmem_layout(void);
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
 
 /**
  * Dump the physical memory layout to a file.
  *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
  * @param f
  *   A pointer to a file for output
  */
@@ -157,6 +331,9 @@ void rte_dump_physmem_layout(FILE *f);
 /**
  * Get the total amount of available physical memory.
  *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
  * @return
  *    The total amount of available physical memory in bytes.
  */
@@ -191,6 +368,137 @@ unsigned rte_memory_get_nrank(void);
  */
 int rte_eal_using_phys_addrs(void);
 
+
+/**
+ * Enum indicating which kind of memory event has happened. Used by callbacks to
+ * distinguish between memory allocations and deallocations.
+ */
+enum rte_mem_event {
+	RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
+	RTE_MEM_EVENT_FREE,      /**< Deallocation event. */
+};
+#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
+/**< maximum length of callback name */
+
+/**
+ * Function typedef used to register callbacks for memory events.
+ */
+typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
+		const void *addr, size_t len, void *arg);
+
+/**
+ * Function used to register callbacks for memory events.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ *       therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ *       deadlock when called from within such callbacks.
+ *
+ * @note mem event callbacks not being supported is an expected error condition,
+ *       so user code needs to handle this situation. In these cases, return
+ *       value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ *   Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ *   Callback function pointer.
+ *
+ * @param arg
+ *   Argument to pass to the callback.
+ *
+ * @return
+ *   0 on successful callback register
+ *   -1 on unsuccessful callback register, with rte_errno value indicating
+ *   reason for failure.
+ */
+int __rte_experimental
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+		void *arg);
+
+/**
+ * Function used to unregister callbacks for memory events.
+ *
+ * @param name
+ *   Name associated with specified callback to be removed from the list.
+ *
+ * @param arg
+ *   Argument to look for among callbacks with specified callback name.
+ *
+ * @return
+ *   0 on successful callback unregister
+ *   -1 on unsuccessful callback unregister, with rte_errno value indicating
+ *   reason for failure.
+ */
+int __rte_experimental
+rte_mem_event_callback_unregister(const char *name, void *arg);
+
+
+#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
+/**< maximum length of alloc validator name */
+/**
+ * Function typedef used to register memory allocation validation callbacks.
+ *
+ * Returning 0 will allow allocation attempt to continue. Returning -1 will
+ * prevent allocation from succeeding.
+ */
+typedef int (*rte_mem_alloc_validator_t)(int socket_id,
+		size_t cur_limit, size_t new_len);
+
+/**
+ * @brief Register validator callback for memory allocations.
+ *
+ * Callbacks registered by this function will be called right before memory
+ * allocator is about to trigger allocation of more pages from the system if
+ * said allocation will bring total memory usage above specified limit on
+ * specified socket. User will be able to cancel pending allocation if callback
+ * returns -1.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ *       therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ *       deadlock when called from within such callbacks.
+ *
+ * @note validator callbacks not being supported is an expected error condition,
+ *       so user code needs to handle this situation. In these cases, return
+ *       value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ *   Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ *   Callback function pointer.
+ *
+ * @param socket_id
+ *   Socket ID on which to watch for allocations.
+ *
+ * @param limit
+ *   Limit above which to trigger callbacks.
+ *
+ * @return
+ *   0 on successful callback register
+ *   -1 on unsuccessful callback register, with rte_errno value indicating
+ *   reason for failure.
+ */
+int __rte_experimental
+rte_mem_alloc_validator_register(const char *name,
+		rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+/**
+ * @brief Unregister validator callback for memory allocations.
+ *
+ * @param name
+ *   Name associated with specified callback to be removed from the list.
+ *
+ * @param socket_id
+ *   Socket ID on which to watch for allocations.
+ *
+ * @return
+ *   0 on successful callback unregister
+ *   -1 on unsuccessful callback unregister, with rte_errno value indicating
+ *   reason for failure.
+ */
+int __rte_experimental
+rte_mem_alloc_validator_unregister(const char *name, int socket_id);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index 2bfb2731..f478fa9e 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -23,6 +23,7 @@
  */
 
 #include <stdio.h>
+#include <rte_compat.h>
 #include <rte_memory.h>
 #include <rte_common.h>
 
@@ -39,6 +40,7 @@ extern "C" {
 #define RTE_MEMZONE_512MB          0x00040000   /**< Use 512MB pages. */
 #define RTE_MEMZONE_4GB            0x00080000   /**< Use 4GB pages. */
 #define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004   /**< Use available page size */
+#define RTE_MEMZONE_IOVA_CONTIG    0x00100000   /**< Ask for IOVA-contiguous memzone. */
 
 /**
  * A structure describing a memzone, which is a contiguous portion of
@@ -66,7 +68,6 @@ struct rte_memzone {
 	int32_t socket_id;                /**< NUMA socket ID. */
 
 	uint32_t flags;                   /**< Characteristics of this memzone. */
-	uint32_t memseg_id;               /**< Memseg it belongs. */
 } __attribute__((__packed__));
 
 /**
@@ -76,6 +77,17 @@ struct rte_memzone {
  * correctly filled memzone descriptor. If the allocation cannot be
  * done, return NULL.
  *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
  * @param name
  *   The name of the memzone. If it already exists, the function will
  *   fail and return NULL.
@@ -102,6 +114,9 @@ struct rte_memzone {
  *                                  If this flag is not set, the function
  *                                  will return error on an unavailable size
  *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
  * @return
  *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
  *   on error.
@@ -126,6 +141,17 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
  * descriptor. If the allocation cannot be done or if the alignment
  * is not a power of 2, returns NULL.
  *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
  * @param name
  *   The name of the memzone. If it already exists, the function will
  *   fail and return NULL.
@@ -152,6 +178,9 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
  *                                  If this flag is not set, the function
  *                                  will return error on an unavailable size
  *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
  * @param align
  *   Alignment for resulting memzone. Must be a power of 2.
  * @return
@@ -181,6 +210,17 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
  * boundary. That implies that requested length should be less or equal
  * then boundary.
  *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ *   memzones from memory that is already available. It will not trigger any
+ *   new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
+ *
  * @param name
  *   The name of the memzone. If it already exists, the function will
  *   fail and return NULL.
@@ -207,6 +247,9 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
  *                                  If this flag is not set, the function
  *                                  will return error on an unavailable size
  *                                  request.
+ *   - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ *                               This option should be used when allocating
+ *                               memory intended for hardware rings etc.
  * @param align
  *   Alignment for resulting memzone. Must be a power of 2.
  * @param bound
diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
index 08222510..e12c2208 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
@@ -1,59 +1,5 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- *   redistributing this file, you may do so under either license.
- *
- *   GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- *
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
  */
 
 #ifndef _RTE_PCI_DEV_DEFS_H_
diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h
index 67b986a6..6104123d 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_features.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_features.h
@@ -1,59 +1,5 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- *   redistributing this file, you may do so under either license.
- *
- *   GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- *
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
  */
 
 #ifndef _RTE_PCI_DEV_FEATURES_H
diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h
index 63bb2808..b2ca1c20 100644
--- a/lib/librte_eal/common/include/rte_random.h
+++ b/lib/librte_eal/common/include/rte_random.h
@@ -31,7 +31,7 @@ extern "C" {
 static inline void
 rte_srand(uint64_t seedval)
 {
-	srand48((long unsigned int)seedval);
+	srand48((long)seedval);
 }
 
 /**
@@ -48,9 +48,9 @@ static inline uint64_t
 rte_rand(void)
 {
 	uint64_t val;
-	val = lrand48();
+	val = (uint64_t)lrand48();
 	val <<= 32;
-	val += lrand48();
+	val += (uint64_t)lrand48();
 	return val;
 }
 
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 211eb376..34b41aff 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -47,9 +47,6 @@ extern "C" {
 #define RTE_SERVICE_CAP_MT_SAFE (1 << 0)
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  *  Return the number of services registered.
  *
  * The number of services registered can be passed to *rte_service_get_by_id*,
@@ -57,12 +54,9 @@ extern "C" {
  *
  * @return The number of services registered.
  */
-uint32_t __rte_experimental rte_service_get_count(void);
+uint32_t rte_service_get_count(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Return the id of a service by name.
  *
  * This function provides the id of the service using the service name as
@@ -84,24 +78,17 @@ uint32_t __rte_experimental rte_service_get_count(void);
  * @retval -EINVAL Null *service_id* pointer provided
  * @retval -ENODEV No such service registered
  */
-int32_t __rte_experimental rte_service_get_by_name(const char *name,
-					       uint32_t *service_id);
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Return the name of the service.
  *
  * @return A pointer to the name of the service. The returned pointer remains
  *         in ownership of the service, and the application must not free it.
  */
-const char __rte_experimental *rte_service_get_name(uint32_t id);
+const char *rte_service_get_name(uint32_t id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Check if a service has a specific capability.
  *
  * This function returns if *service* has implements *capability*.
@@ -109,13 +96,9 @@ const char __rte_experimental *rte_service_get_name(uint32_t id);
  * @retval 1 Capability supported by this service instance
  * @retval 0 Capability not supported by this service instance
  */
-int32_t __rte_experimental rte_service_probe_capability(uint32_t id,
-						    uint32_t capability);
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Map or unmap a lcore to a service.
  *
  * Each core can be added or removed from running a specific service. This
@@ -134,13 +117,10 @@ int32_t __rte_experimental rte_service_probe_capability(uint32_t id,
  * @retval 0 lcore map updated successfully
  * @retval -EINVAL An invalid service or lcore was provided.
  */
-int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id,
-				  uint32_t lcore, uint32_t enable);
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+		uint32_t enable);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Retrieve the mapping of an lcore to a service.
  *
  * @param service_id the service to apply the lcore to
@@ -150,13 +130,9 @@ int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id,
  * @retval 0 lcore is not mapped to service
  * @retval -EINVAL An invalid service or lcore was provided.
  */
-int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id,
-						 uint32_t lcore);
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Set the runstate of the service.
  *
  * Each service is either running or stopped. Setting a non-zero runstate
@@ -168,12 +144,9 @@ int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id,
  * @retval 0 The service was successfully started
  * @retval -EINVAL Invalid service id
  */
-int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runstate);
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get the runstate for the service with *id*. See *rte_service_runstate_set*
  * for details of runstates. A service can call this function to ensure that
  * the application has indicated that it will receive CPU cycles. Either a
@@ -186,12 +159,29 @@ int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runsta
  * @retval 0 Service is stopped
  * @retval -EINVAL Invalid service id
  */
-int32_t __rte_experimental rte_service_runstate_get(uint32_t id);
+int32_t rte_service_runstate_get(uint32_t id);
 
 /**
  * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
  *
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id);
+
+/**
  * Enable or disable the check for a service-core being mapped to the service.
  * An application can disable the check when takes the responsibility to run a
  * service itself using *rte_service_run_iter_on_app_lcore*.
@@ -202,13 +192,9 @@ int32_t __rte_experimental rte_service_runstate_get(uint32_t id);
  * @retval 0 Success
  * @retval -EINVAL Invalid service ID
  */
-int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id,
-							     int32_t enable);
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * This function runs a service callback from a non-service lcore.
  *
  * This function is designed to enable gradual porting to service cores, and
@@ -241,13 +227,10 @@ int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id,
  * @retval -ENOEXEC Service is not in a run-able state
  * @retval -EINVAL Invalid service id
  */
-int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
 		uint32_t serialize_multithread_unsafe);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Start a service core.
  *
  * Starting a core makes the core begin polling. Any services assigned to it
@@ -259,12 +242,9 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
  * @retval -EINVAL Failed to start core. The *lcore_id* passed in is not
  *          currently assigned to be a service core.
  */
-int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id);
+int32_t rte_service_lcore_start(uint32_t lcore_id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Stop a service core.
  *
  * Stopping a core makes the core become idle, but remains  assigned as a
@@ -278,12 +258,9 @@ int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id);
  *          The application must stop the service first, and then stop the
  *          lcore.
  */
-int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id);
+int32_t rte_service_lcore_stop(uint32_t lcore_id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Adds lcore to the list of service cores.
  *
  * This functions can be used at runtime in order to modify the service core
@@ -294,12 +271,9 @@ int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id);
  * @retval -EALREADY lcore is already added to the service core list
  * @retval -EINVAL Invalid lcore provided
  */
-int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore);
+int32_t rte_service_lcore_add(uint32_t lcore);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Removes lcore from the list of service cores.
  *
  * This can fail if the core is not stopped, see *rte_service_core_stop*.
@@ -308,12 +282,9 @@ int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore);
  * @retval -EBUSY Lcore is not stopped, stop service core before removing.
  * @retval -EINVAL failed to add lcore to service core mask.
  */
-int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore);
+int32_t rte_service_lcore_del(uint32_t lcore);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Retrieve the number of service cores currently available.
  *
  * This function returns the integer count of service cores available. The
@@ -325,24 +296,18 @@ int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore);
  *
  * @return The number of service cores currently configured.
  */
-int32_t __rte_experimental rte_service_lcore_count(void);
+int32_t rte_service_lcore_count(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Resets all service core mappings. This does not remove the service cores
  * from duty, just unmaps all services / cores, and stops() the service cores.
  * The runstate of services is not modified.
  *
  * @retval 0 Success
  */
-int32_t __rte_experimental rte_service_lcore_reset_all(void);
+int32_t rte_service_lcore_reset_all(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Enable or disable statistics collection for *service*.
  *
  * This function enables per core, per-service cycle count collection.
@@ -351,13 +316,9 @@ int32_t __rte_experimental rte_service_lcore_reset_all(void);
  * @retval 0 Success
  * @retval -EINVAL Invalid service pointer passed
  */
-int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id,
-						    int32_t enable);
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Retrieve the list of currently enabled service cores.
  *
  * This function fills in an application supplied array, with each element
@@ -373,12 +334,9 @@ int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id,
  *          service core list. No items have been populated, call this function
  *          with a size of at least *rte_service_core_count* items.
  */
-int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n);
+int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get the numer of services running on the supplied lcore.
  *
  * @param lcore Id of the service core.
@@ -386,19 +344,16 @@ int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n);
  * @retval -EINVAL Invalid lcore provided
  * @retval -ENOTSUP The provided lcore is not a service core.
  */
-int32_t __rte_experimental rte_service_lcore_count_services(uint32_t lcore);
+int32_t rte_service_lcore_count_services(uint32_t lcore);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Dumps any information available about the service. When id is UINT32_MAX,
  * this function dumps info for all services.
  *
  * @retval 0 Statistics have been successfully dumped
  * @retval -EINVAL Invalid service id provided
  */
-int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id);
+int32_t rte_service_dump(FILE *f, uint32_t id);
 
 /**
  * Returns the number of cycles that this service has consumed
@@ -411,28 +366,58 @@ int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id);
 #define RTE_SERVICE_ATTR_CALL_COUNT 1
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get an attribute from a service.
  *
  * @retval 0 Success, the attribute value has been written to *attr_value*.
  *         -EINVAL Invalid id, attr_id or attr_value was NULL.
  */
-int32_t __rte_experimental rte_service_attr_get(uint32_t id, uint32_t attr_id,
+int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
 		uint32_t *attr_value);
 
 /**
+ * Reset all attribute values of a service.
+ *
+ * @param id The service to reset all statistics of
+ * @retval 0 Successfully reset attributes
+ *         -EINVAL Invalid service id provided
+ */
+int32_t rte_service_attr_reset_all(uint32_t id);
+
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * Reset all attribute values of a service.
+ * Get an attribute from a service core.
  *
- * @param id The service to reset all statistics of
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ *         -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ *         -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+			   uint64_t *attr_value);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
  * @retval 0 Successfully reset attributes
  *         -EINVAL Invalid service id provided
+ *         -ENOTSUP lcore is not a service core.
  */
-int32_t __rte_experimental rte_service_attr_reset_all(uint32_t id);
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
index 9ba4aa29..c12adbc2 100644
--- a/lib/librte_eal/common/include/rte_service_component.h
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -13,17 +13,11 @@
 #include <rte_service.h>
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Signature of callback function to run a service.
  */
 typedef int32_t (*rte_service_func)(void *args);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * The specification of a service.
  *
  * This struct contains metadata about the service itself, the callback
@@ -47,9 +41,6 @@ struct rte_service_spec {
 };
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Register a new service.
  *
  * A service represents a component that the requires CPU time periodically to
@@ -73,14 +64,10 @@ struct rte_service_spec {
  *         -EINVAL Attempted to register an invalid service (eg, no callback
  *         set)
  */
-int32_t __rte_experimental
-rte_service_component_register(const struct rte_service_spec *spec,
-			       uint32_t *service_id);
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+		uint32_t *service_id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Unregister a service component.
  *
  * The service being removed must be stopped before calling this function.
@@ -89,12 +76,9 @@ rte_service_component_register(const struct rte_service_spec *spec,
  * @retval -EBUSY The service is currently running, stop the service before
  *          calling unregister. No action has been taken.
  */
-int32_t __rte_experimental rte_service_component_unregister(uint32_t id);
+int32_t rte_service_component_unregister(uint32_t id);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Private function to allow EAL to initialized default mappings.
  *
  * This function iterates all the services, and maps then to the available
@@ -107,12 +91,9 @@ int32_t __rte_experimental rte_service_component_unregister(uint32_t id);
  * @retval -ENODEV Error in enabling service lcore on a service
  * @retval -ENOEXEC Error when starting services
  */
-int32_t __rte_experimental rte_service_start_with_defaults(void);
+int32_t rte_service_start_with_defaults(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Set the backend runstate of a component.
  *
  * This function allows services to be registered at startup, but not yet
@@ -124,13 +105,9 @@ int32_t __rte_experimental rte_service_start_with_defaults(void);
  *
  * @retval 0 Success
  */
-int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id,
-							  uint32_t runstate);
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Initialize the service library.
  *
  * In order to use the service library, it must be initialized. EAL initializes
@@ -142,14 +119,11 @@ int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id,
 int32_t rte_service_init(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * @internal Free up the memory that has been initialized.
  * This routine is to be invoked prior to process termination.
  *
  * @retval None
  */
-void __rte_experimental rte_service_finalize(void);
+void rte_service_finalize(void);
 
 #endif /* _RTE_SERVICE_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
index e97047a4..97597a14 100644
--- a/lib/librte_eal/common/include/rte_string_fns.h
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -15,6 +15,8 @@
 extern "C" {
 #endif
 
+#include <stdio.h>
+
 /**
  * Takes string "string" parameter and splits it at character "delim"
  * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -45,6 +47,35 @@ int
 rte_strsplit(char *string, int stringlen,
              char **tokens, int maxtokens, char delim);
 
+/**
+ * @internal
+ * DPDK-specific version of strlcpy for systems without
+ * libc or libbsd copies of the function
+ */
+static inline size_t
+rte_strlcpy(char *dst, const char *src, size_t size)
+{
+	return (size_t)snprintf(dst, size, "%s", src);
+}
+
+/* pull in a strlcpy function */
+#ifdef RTE_EXEC_ENV_BSDAPP
+#include <string.h>
+#ifndef __BSD_VISIBLE /* non-standard functions are hidden */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#endif
+
+
+#else /* non-BSD platforms */
+#ifdef RTE_USE_LIBBSD
+#include <bsd/string.h>
+
+#else /* no BSD header files, create own */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+
+#endif /* RTE_USE_LIBBSD */
+#endif /* BSDAPP */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 8dccaefc..9b01abb2 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
 int rte_eal_tailq_register(struct rte_tailq_elem *t);
 
 #define EAL_REGISTER_TAILQ(t) \
-RTE_INIT(tailqinitfn_ ##t); \
-static void tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t) \
 { \
 	if (rte_eal_tailq_register(&t) < 0) \
 		rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
new file mode 100644
index 00000000..2c846b5f
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.	 IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifer
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) {		\
+	((a) >> 24) & 0xff, ((a) >> 16) & 0xff,	\
+	((a) >> 8) & 0xff, (a) & 0xff,		\
+	((b) >> 8) & 0xff, (b) & 0xff,		\
+	((c) >> 8) & 0xff, (c) & 0xff,		\
+	((d) >> 8) & 0xff, (d) & 0xff,		\
+	((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+	((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+	((e) >> 8) & 0xff, (e) & 0xff		\
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ *    The uuid to check.
+ * @return
+ *    true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ *    Destination uuid
+ * @param src
+ *    Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+	memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ *    A UUID to compare
+ * @param b
+ *    A UUID to compare
+ * @return
+ *   returns an integer less than, equal to, or greater than zero if UUID a is
+ *   is less than, equal, or greater than UUID b.
+ */
+int	rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ *    Pointer to string of characters to convert
+ * @param uu
+ *    Destination UUID
+ * @return
+ *    Returns 0 on succes, and -1 if string is not a valid UUID.
+ */
+int	rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ *    UUID to format
+ * @param out
+ *    Resulting string buffer
+ * @param len
+ *    Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN	(36 + 1)
+void	rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 8173802b..7c6714a2 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 02
+#define RTE_VER_MONTH 8
 
 /**
  * Patch level number i.e. the z in yy.mm.z
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index e981a622..5ca13fcc 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -5,6 +5,15 @@
 #ifndef _RTE_VFIO_H_
 #define _RTE_VFIO_H_
 
+/**
+ * @file
+ * RTE VFIO. This library provides various VFIO related utility functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * determine if VFIO is present on the system
  */
@@ -28,6 +37,20 @@
 #define VFIO_NOIOMMU_MODE      \
 	"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
 
+/* NOIOMMU is defined from kernel version 4.5 onwards */
+#ifdef VFIO_NOIOMMU_IOMMU
+#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
+#else
+#define RTE_VFIO_NOIOMMU 8
+#endif
+
+#else /* not VFIO_PRESENT */
+
+/* we don't need an actual definition, only pointer is used */
+struct vfio_device_info;
+
+#endif /* VFIO_PRESENT */
+
 /**
  * Setup vfio_cfg for the device identified by its address.
  * It discovers the configured I/O MMU groups or sets a new one for the device.
@@ -119,10 +142,226 @@ int rte_vfio_is_enabled(const char *modname);
  */
 int rte_vfio_noiommu_is_enabled(void);
 
-/* remove group fd from internal VFIO group fd array */
+/**
+ * Remove group fd from internal VFIO group fd array/
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param vfio_group_fd
+ *   VFIO Grouup FD.
+ *
+ * @return
+ *   0 on success.
+ *   <0 on failure.
+ */
 int
 rte_vfio_clear_group(int vfio_group_fd);
 
-#endif /* VFIO_PRESENT */
+/**
+ * Map memory region for use with VFIO.
+ *
+ * @note Require at least one device to be attached at the time of
+ *       mapping. DMA maps done via this API will only apply to default
+ *       container and will not apply to any of the containers created
+ *       via rte_vfio_container_create().
+ *
+ * @param vaddr
+ *   Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ *   Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ *   Length of memory segment being mapped.
+ *
+ * @return
+ *   0 if success.
+ *   -1 on error.
+ */
+int
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+
+/**
+ * Unmap memory region from VFIO.
+ *
+ * @param vaddr
+ *   Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ *   Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ *   Length of memory segment being unmapped.
+ *
+ * @return
+ *   0 if success.
+ *   -1 on error.
+ */
+
+int
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+/**
+ * Parse IOMMU group number for a device
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ *   sysfs path prefix.
+ *
+ * @param dev_addr
+ *   device location.
+ *
+ * @param iommu_group_num
+ *   iommu group number
+ *
+ * @return
+ *  >0 on success
+ *   0 for non-existent group or VFIO
+ *  <0 for errors
+ */
+int
+rte_vfio_get_group_num(const char *sysfs_base,
+		      const char *dev_addr, int *iommu_group_num);
+
+/**
+ * Open VFIO container fd or get an existing one
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ *  > 0 container fd
+ *  < 0 for errors
+ */
+int
+rte_vfio_get_container_fd(void);
+
+/**
+ * Open VFIO group fd or get an existing one
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param iommu_group_num
+ *   iommu group number
+ *
+ * @return
+ *  > 0 group fd
+ *  < 0 for errors
+ */
+int
+rte_vfio_get_group_fd(int iommu_group_num);
+
+/**
+ * Create a new container for device binding.
+ *
+ * @note Any newly allocated DPDK memory will not be mapped into these
+ *       containers by default, user needs to manage DMA mappings for
+ *       any container created by this API.
+ *
+ * @return
+ *   the container fd if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_create(void);
+
+/**
+ * Destroy the container, unbind all vfio groups within it.
+ *
+ * @param container_fd
+ *   the container fd to destroy
+ *
+ * @return
+ *    0 if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_destroy(int container_fd);
+
+/**
+ * Bind a IOMMU group to a container.
+ *
+ * @param container_fd
+ *   the container's fd
+ *
+ * @param iommu_group_num
+ *   the iommu group number to bind to container
+ *
+ * @return
+ *   group fd if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
+
+/**
+ * Unbind a IOMMU group from a container.
+ *
+ * @param container_fd
+ *   the container fd of container
+ *
+ * @param iommu_group_num
+ *   the iommu group number to delete from container
+ *
+ * @return
+ *    0 if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
+
+/**
+ * Perform DMA mapping for devices in a container.
+ *
+ * @param container_fd
+ *   the specified container fd
+ *
+ * @param vaddr
+ *   Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ *   Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ *   Length of memory segment being mapped.
+ *
+ * @return
+ *    0 if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
+		uint64_t iova, uint64_t len);
+
+/**
+ * Perform DMA unmapping for devices in a container.
+ *
+ * @param container_fd
+ *   the specified container fd
+ *
+ * @param vaddr
+ *   Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ *   Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ *   Length of memory segment being unmapped.
+ *
+ * @return
+ *    0 if successful
+ *   <0 if failed
+ */
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
+		uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif /* _RTE_VFIO_H_ */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 0cadc8af..e0a8ed15 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2014 Intel Corporation
  */
+#include <inttypes.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
+#include <unistd.h>
 #include <sys/queue.h>
 
 #include <rte_memory.h>
@@ -16,21 +18,100 @@
 #include <rte_common.h>
 #include <rte_spinlock.h>
 
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
 #include "malloc_elem.h"
 #include "malloc_heap.h"
 
-#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+	void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+	void *data_start, *data_end;
+	rte_iova_t expected_iova;
+	struct rte_memseg *ms;
+	size_t page_sz, cur, max;
+
+	page_sz = (size_t)elem->msl->page_sz;
+	data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+	data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+	/* segment must start after header and with specified alignment */
+	contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+	/* if we're in IOVA as VA mode, or if we're in legacy mode with
+	 * hugepages, all elements are IOVA-contiguous.
+	 */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA ||
+			(internal_config.legacy_mem && rte_eal_has_hugepages()))
+		return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+	cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+	ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+	/* do first iteration outside the loop */
+	page_end = RTE_PTR_ADD(cur_page, page_sz);
+	cur_seg_end = RTE_MIN(page_end, data_end);
+	cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+			MALLOC_ELEM_TRAILER_LEN;
+	max = cur;
+	expected_iova = ms->iova + page_sz;
+	/* memsegs are contiguous in memory */
+	ms++;
+
+	cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+	while (cur_page < data_end) {
+		page_end = RTE_PTR_ADD(cur_page, page_sz);
+		cur_seg_end = RTE_MIN(page_end, data_end);
+
+		/* reset start of contiguous segment if unexpected iova */
+		if (ms->iova != expected_iova) {
+			/* next contiguous segment must start at specified
+			 * alignment.
+			 */
+			contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+			/* new segment start may be on a different page, so find
+			 * the page and skip to next iteration to make sure
+			 * we're not blowing past data end.
+			 */
+			ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+			cur_page = ms->addr;
+			/* don't trigger another recalculation */
+			expected_iova = ms->iova;
+			continue;
+		}
+		/* cur_seg_end ends on a page boundary or on data end. if we're
+		 * looking at data end, then malloc trailer is already included
+		 * in the calculations. if we're looking at page end, then we
+		 * know there's more data past this page and thus there's space
+		 * for malloc element trailer, so don't count it here.
+		 */
+		cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+		/* update max if cur value is bigger */
+		if (cur > max)
+			max = cur;
+
+		/* move to next page */
+		cur_page = page_end;
+		expected_iova = ms->iova + page_sz;
+		/* memsegs are contiguous in memory */
+		ms++;
+	}
+
+	return max;
+}
 
 /*
  * Initialize a general malloc_elem header structure
  */
 void
-malloc_elem_init(struct malloc_elem *elem,
-		struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
+malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
+		struct rte_memseg_list *msl, size_t size)
 {
 	elem->heap = heap;
-	elem->ms = ms;
+	elem->msl = msl;
 	elem->prev = NULL;
+	elem->next = NULL;
 	memset(&elem->free_list, 0, sizeof(elem->free_list));
 	elem->state = ELEM_FREE;
 	elem->size = size;
@@ -39,15 +120,74 @@ malloc_elem_init(struct malloc_elem *elem,
 	set_trailer(elem);
 }
 
+void
+malloc_elem_insert(struct malloc_elem *elem)
+{
+	struct malloc_elem *prev_elem, *next_elem;
+	struct malloc_heap *heap = elem->heap;
+
+	/* first and last elements must be both NULL or both non-NULL */
+	if ((heap->first == NULL) != (heap->last == NULL)) {
+		RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
+		return;
+	}
+
+	if (heap->first == NULL && heap->last == NULL) {
+		/* if empty heap */
+		heap->first = elem;
+		heap->last = elem;
+		prev_elem = NULL;
+		next_elem = NULL;
+	} else if (elem < heap->first) {
+		/* if lower than start */
+		prev_elem = NULL;
+		next_elem = heap->first;
+		heap->first = elem;
+	} else if (elem > heap->last) {
+		/* if higher than end */
+		prev_elem = heap->last;
+		next_elem = NULL;
+		heap->last = elem;
+	} else {
+		/* the new memory is somewhere inbetween start and end */
+		uint64_t dist_from_start, dist_from_end;
+
+		dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+		dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+		/* check which is closer, and find closest list entries */
+		if (dist_from_start < dist_from_end) {
+			prev_elem = heap->first;
+			while (prev_elem->next < elem)
+				prev_elem = prev_elem->next;
+			next_elem = prev_elem->next;
+		} else {
+			next_elem = heap->last;
+			while (next_elem->prev > elem)
+				next_elem = next_elem->prev;
+			prev_elem = next_elem->prev;
+		}
+	}
+
+	/* insert new element */
+	elem->prev = prev_elem;
+	elem->next = next_elem;
+	if (prev_elem)
+		prev_elem->next = elem;
+	if (next_elem)
+		next_elem->prev = elem;
+}
+
 /*
- * Initialize a dummy malloc_elem header for the end-of-memseg marker
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
  */
-void
-malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+static bool
+elem_check_phys_contig(const struct rte_memseg_list *msl,
+		void *start, size_t size)
 {
-	malloc_elem_init(elem, prev->heap, prev->ms, 0);
-	elem->prev = prev;
-	elem->state = ELEM_BUSY; /* mark busy so its never merged */
+	return eal_memalloc_is_contig(msl, start, size);
 }
 
 /*
@@ -57,27 +197,59 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
  */
 static void *
 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
-		size_t bound)
+		size_t bound, bool contig)
 {
-	const size_t bmask = ~(bound - 1);
-	uintptr_t end_pt = (uintptr_t)elem +
-			elem->size - MALLOC_ELEM_TRAILER_LEN;
-	uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
-	uintptr_t new_elem_start;
-
-	/* check boundary */
-	if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
-		end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
-		new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
-		end_pt = new_data_start + size;
-		if (((end_pt - 1) & bmask) != (new_data_start & bmask))
-			return NULL;
-	}
+	size_t elem_size = elem->size;
+
+	/*
+	 * we're allocating from the end, so adjust the size of element by
+	 * alignment size.
+	 */
+	while (elem_size >= size) {
+		const size_t bmask = ~(bound - 1);
+		uintptr_t end_pt = (uintptr_t)elem +
+				elem_size - MALLOC_ELEM_TRAILER_LEN;
+		uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+				align);
+		uintptr_t new_elem_start;
+
+		/* check boundary */
+		if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+			end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+			new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+					align);
+			end_pt = new_data_start + size;
+
+			if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+				return NULL;
+		}
+
+		new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
 
-	new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+		/* if the new start point is before the exist start,
+		 * it won't fit
+		 */
+		if (new_elem_start < (uintptr_t)elem)
+			return NULL;
 
-	/* if the new start point is before the exist start, it won't fit */
-	return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
+		if (contig) {
+			size_t new_data_size = end_pt - new_data_start;
+
+			/*
+			 * if physical contiguousness was requested and we
+			 * couldn't fit all data into one physically contiguous
+			 * block, try again with lower addresses.
+			 */
+			if (!elem_check_phys_contig(elem->msl,
+					(void *)new_data_start,
+					new_data_size)) {
+				elem_size -= align;
+				continue;
+			}
+		}
+		return (void *)new_elem_start;
+	}
+	return NULL;
 }
 
 /*
@@ -86,9 +258,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
  */
 int
 malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
-		size_t bound)
+		size_t bound, bool contig)
 {
-	return elem_start_pt(elem, size, align, bound) != NULL;
+	return elem_start_pt(elem, size, align, bound, contig) != NULL;
 }
 
 /*
@@ -98,18 +270,58 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
 static void
 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
 {
-	struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+	struct malloc_elem *next_elem = elem->next;
 	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
 	const size_t new_elem_size = elem->size - old_elem_size;
 
-	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
+	malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size);
 	split_pt->prev = elem;
-	next_elem->prev = split_pt;
+	split_pt->next = next_elem;
+	if (next_elem)
+		next_elem->prev = split_pt;
+	else
+		elem->heap->last = split_pt;
+	elem->next = split_pt;
 	elem->size = old_elem_size;
 	set_trailer(elem);
 }
 
 /*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+	struct malloc_elem *next, *prev;
+	next = elem->next;
+	prev = elem->prev;
+
+	if (next)
+		next->prev = prev;
+	else
+		elem->heap->last = prev;
+	if (prev)
+		prev->next = next;
+	else
+		elem->heap->first = next;
+
+	elem->prev = NULL;
+	elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem->next == RTE_PTR_ADD(elem, elem->size);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+	return elem == RTE_PTR_ADD(elem->prev, elem->prev->size);
+}
+
+/*
  * Given an element size, compute its freelist index.
  * We free an element into the freelist containing similarly-sized elements.
  * We try to allocate elements starting with the freelist containing
@@ -162,8 +374,8 @@ malloc_elem_free_list_insert(struct malloc_elem *elem)
 /*
  * Remove the specified element from its heap's free list.
  */
-static void
-elem_free_list_remove(struct malloc_elem *elem)
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem)
 {
 	LIST_REMOVE(elem, free_list);
 }
@@ -176,14 +388,15 @@ elem_free_list_remove(struct malloc_elem *elem)
  */
 struct malloc_elem *
 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
-		size_t bound)
+		size_t bound, bool contig)
 {
-	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+			contig);
 	const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
 	const size_t trailer_size = elem->size - old_elem_size - size -
 		MALLOC_ELEM_OVERHEAD;
 
-	elem_free_list_remove(elem);
+	malloc_elem_free_list_remove(elem);
 
 	if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
 		/* split it, too much free space after elem */
@@ -192,6 +405,9 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 
 		split_elem(elem, new_free_elem);
 		malloc_elem_free_list_insert(new_free_elem);
+
+		if (elem == elem->heap->last)
+			elem->heap->last = new_free_elem;
 	}
 
 	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
@@ -230,9 +446,66 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
 static inline void
 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 {
-	struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+	struct malloc_elem *next = elem2->next;
 	elem1->size += elem2->size;
-	next->prev = elem1;
+	if (next)
+		next->prev = elem1;
+	else
+		elem1->heap->last = elem1;
+	elem1->next = next;
+}
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem)
+{
+	/*
+	 * check if next element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+			next_elem_is_adjacent(elem)) {
+		void *erase;
+		size_t erase_len;
+
+		/* we will want to erase the trailer and header */
+		erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
+
+		/* remove from free list, join to this one */
+		malloc_elem_free_list_remove(elem->next);
+		join_elem(elem, elem->next);
+
+		/* erase header, trailer and pad */
+		memset(erase, 0, erase_len);
+	}
+
+	/*
+	 * check if prev element exists, is adjacent and is free, if so join
+	 * with it, need to remove from free list.
+	 */
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+			prev_elem_is_adjacent(elem)) {
+		struct malloc_elem *new_elem;
+		void *erase;
+		size_t erase_len;
+
+		/* we will want to erase trailer and header */
+		erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
+
+		/* remove from free list, join to this one */
+		malloc_elem_free_list_remove(elem->prev);
+
+		new_elem = elem->prev;
+		join_elem(new_elem, elem);
+
+		/* erase header, trailer and pad */
+		memset(erase, 0, erase_len);
+
+		elem = new_elem;
+	}
+
+	return elem;
 }
 
 /*
@@ -240,43 +513,74 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
  * blocks either immediately before or immediately after newly freed block
  * are also free, the blocks are merged together.
  */
-int
+struct malloc_elem *
 malloc_elem_free(struct malloc_elem *elem)
 {
-	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
-		return -1;
+	void *ptr;
+	size_t data_len;
 
-	rte_spinlock_lock(&(elem->heap->lock));
-	size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
-	uint8_t *ptr = (uint8_t *)&elem[1];
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	if (next->state == ELEM_FREE){
-		/* remove from free list, join to this one */
-		elem_free_list_remove(next);
-		join_elem(elem, next);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-	}
+	ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+	elem = malloc_elem_join_adjacent_free(elem);
 
-	/* check if previous element is free, if so join with it and return,
-	 * need to re-insert in free list, as that element's size is changing
-	 */
-	if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
-		elem_free_list_remove(elem->prev);
-		join_elem(elem->prev, elem);
-		sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
-		elem = elem->prev;
-	}
 	malloc_elem_free_list_insert(elem);
 
+	elem->pad = 0;
+
 	/* decrease heap's count of allocated elements */
 	elem->heap->alloc_count--;
 
-	memset(ptr, 0, sz);
+	memset(ptr, 0, data_len);
 
-	rte_spinlock_unlock(&(elem->heap->lock));
+	return elem;
+}
 
-	return 0;
+/* assume all checks were already done */
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
+{
+	struct malloc_elem *hide_start, *hide_end, *prev, *next;
+	size_t len_before, len_after;
+
+	hide_start = start;
+	hide_end = RTE_PTR_ADD(start, len);
+
+	prev = elem->prev;
+	next = elem->next;
+
+	/* we cannot do anything with non-adjacent elements */
+	if (next && next_elem_is_adjacent(elem)) {
+		len_after = RTE_PTR_DIFF(next, hide_end);
+		if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+			/* split after */
+			split_elem(elem, hide_end);
+
+			malloc_elem_free_list_insert(hide_end);
+		} else if (len_after > 0) {
+			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+			return;
+		}
+	}
+
+	/* we cannot do anything with non-adjacent elements */
+	if (prev && prev_elem_is_adjacent(elem)) {
+		len_before = RTE_PTR_DIFF(hide_start, elem);
+		if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+			/* split before */
+			split_elem(elem, hide_start);
+
+			prev = elem;
+			elem = hide_start;
+
+			malloc_elem_free_list_insert(prev);
+		} else if (len_before > 0) {
+			RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+			return;
+		}
+	}
+
+	remove_elem(elem);
 }
 
 /*
@@ -287,22 +591,23 @@ int
 malloc_elem_resize(struct malloc_elem *elem, size_t size)
 {
 	const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
 	/* if we request a smaller size, then always return ok */
 	if (elem->size >= new_size)
 		return 0;
 
-	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
-	rte_spinlock_lock(&elem->heap->lock);
-	if (next ->state != ELEM_FREE)
-		goto err_return;
-	if (elem->size + next->size < new_size)
-		goto err_return;
+	/* check if there is a next element, it's free and adjacent */
+	if (!elem->next || elem->next->state != ELEM_FREE ||
+			!next_elem_is_adjacent(elem))
+		return -1;
+	if (elem->size + elem->next->size < new_size)
+		return -1;
 
 	/* we now know the element fits, so remove from free list,
 	 * join the two
 	 */
-	elem_free_list_remove(next);
-	join_elem(elem, next);
+	malloc_elem_free_list_remove(elem->next);
+	join_elem(elem, elem->next);
 
 	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
 		/* now we have a big block together. Lets cut it down a bit, by splitting */
@@ -311,10 +616,28 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
 		split_elem(elem, split_pt);
 		malloc_elem_free_list_insert(split_pt);
 	}
-	rte_spinlock_unlock(&elem->heap->lock);
 	return 0;
+}
 
-err_return:
-	rte_spinlock_unlock(&elem->heap->lock);
-	return -1;
+static inline const char *
+elem_state_to_str(enum elem_state state)
+{
+	switch (state) {
+	case ELEM_PAD:
+		return "PAD";
+	case ELEM_BUSY:
+		return "BUSY";
+	case ELEM_FREE:
+		return "FREE";
+	}
+	return "ERROR";
+}
+
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
+{
+	fprintf(f, "Malloc element at %p (%s)\n", elem,
+			elem_state_to_str(elem->state));
+	fprintf(f, "  len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
+	fprintf(f, "  prev: %p next: %p\n", elem->prev, elem->next);
 }
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index f4c1c7a9..e2bda4c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -5,7 +5,11 @@
 #ifndef MALLOC_ELEM_H_
 #define MALLOC_ELEM_H_
 
-#include <rte_memory.h>
+#include <stdbool.h>
+
+#include <rte_eal_memconfig.h>
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
 
 /* dummy definition of struct so we can use pointers to it in malloc_elem struct */
 struct malloc_heap;
@@ -18,9 +22,13 @@ enum elem_state {
 
 struct malloc_elem {
 	struct malloc_heap *heap;
-	struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
-	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
-	const struct rte_memseg *ms;
+	struct malloc_elem *volatile prev;
+	/**< points to prev elem in memseg */
+	struct malloc_elem *volatile next;
+	/**< points to next elem in memseg */
+	LIST_ENTRY(malloc_elem) free_list;
+	/**< list of free elements in heap */
+	struct rte_memseg_list *msl;
 	volatile enum elem_state state;
 	uint32_t pad;
 	size_t size;
@@ -107,15 +115,11 @@ malloc_elem_from_data(const void *data)
 void
 malloc_elem_init(struct malloc_elem *elem,
 		struct malloc_heap *heap,
-		const struct rte_memseg *ms,
+		struct rte_memseg_list *msl,
 		size_t size);
 
-/*
- * initialise a dummy malloc_elem header for the end-of-memseg marker
- */
 void
-malloc_elem_mkend(struct malloc_elem *elem,
-		struct malloc_elem *prev_free);
+malloc_elem_insert(struct malloc_elem *elem);
 
 /*
  * return true if the current malloc_elem can hold a block of data
@@ -123,7 +127,7 @@ malloc_elem_mkend(struct malloc_elem *elem,
  */
 int
 malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
-		unsigned align, size_t bound);
+		unsigned int align, size_t bound, bool contig);
 
 /*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
@@ -131,16 +135,19 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
  */
 struct malloc_elem *
 malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-		unsigned align, size_t bound);
+		unsigned int align, size_t bound, bool contig);
 
 /*
  * free a malloc_elem block by adding it to the free list. If the
  * blocks either immediately before or immediately after newly freed block
  * are also free, the blocks are merged together.
  */
-int
+struct malloc_elem *
 malloc_elem_free(struct malloc_elem *elem);
 
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem);
+
 /*
  * attempt to resize a malloc_elem by expanding into any free space
  * immediately after it in memory.
@@ -148,6 +155,18 @@ malloc_elem_free(struct malloc_elem *elem);
 int
 malloc_elem_resize(struct malloc_elem *elem, size_t size);
 
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len);
+
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem);
+
+/*
+ * dump contents of malloc elem to a file.
+ */
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f);
+
 /*
  * Given an element size, compute its freelist index.
  */
@@ -160,4 +179,10 @@ malloc_elem_free_list_index(size_t size);
 void
 malloc_elem_free_list_insert(struct malloc_elem *elem);
 
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
 #endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 7aafc880..12aaf2d7 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -10,6 +10,7 @@
 #include <sys/queue.h>
 
 #include <rte_memory.h>
+#include <rte_errno.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
 #include <rte_launch.h>
@@ -20,9 +21,13 @@
 #include <rte_spinlock.h>
 #include <rte_memcpy.h>
 #include <rte_atomic.h>
+#include <rte_fbarray.h>
 
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
 #include "malloc_elem.h"
 #include "malloc_heap.h"
+#include "malloc_mp.h"
 
 static unsigned
 check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
@@ -62,26 +67,51 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 }
 
 /*
- * Expand the heap with a memseg.
- * This reserves the zone and sets a dummy malloc_elem header at the end
- * to prevent overflow. The rest of the zone is added to free list as a single
- * large free block
+ * Expand the heap with a memory area.
  */
-static void
-malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
+static struct malloc_elem *
+malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
+		void *start, size_t len)
+{
+	struct malloc_elem *elem = start;
+
+	malloc_elem_init(elem, heap, msl, len);
+
+	malloc_elem_insert(elem);
+
+	elem = malloc_elem_join_adjacent_free(elem);
+
+	malloc_elem_free_list_insert(elem);
+
+	return elem;
+}
+
+static int
+malloc_add_seg(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
 {
-	/* allocate the memory block headers, one at end, one at start */
-	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
-	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
-			ms->len - MALLOC_ELEM_OVERHEAD);
-	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
-	const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *found_msl;
+	struct malloc_heap *heap;
+	int msl_idx;
+
+	heap = &mcfg->malloc_heaps[msl->socket_id];
+
+	/* msl is const, so find it */
+	msl_idx = msl - mcfg->memsegs;
+
+	if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+		return -1;
 
-	malloc_elem_init(start_elem, heap, ms, elem_size);
-	malloc_elem_mkend(end_elem, start_elem);
-	malloc_elem_free_list_insert(start_elem);
+	found_msl = &mcfg->memsegs[msl_idx];
 
-	heap->total_size += elem_size;
+	malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+
+	heap->total_size += len;
+
+	RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
+			msl->socket_id);
+	return 0;
 }
 
 /*
@@ -92,7 +122,7 @@ malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
  */
 static struct malloc_elem *
 find_suitable_element(struct malloc_heap *heap, size_t size,
-		unsigned flags, size_t align, size_t bound)
+		unsigned int flags, size_t align, size_t bound, bool contig)
 {
 	size_t idx;
 	struct malloc_elem *elem, *alt_elem = NULL;
@@ -101,8 +131,10 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
 			idx < RTE_HEAP_NUM_FREELISTS; idx++) {
 		for (elem = LIST_FIRST(&heap->free_head[idx]);
 				!!elem; elem = LIST_NEXT(elem, free_list)) {
-			if (malloc_elem_can_hold(elem, size, align, bound)) {
-				if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
+			if (malloc_elem_can_hold(elem, size, align, bound,
+					contig)) {
+				if (check_hugepage_sz(flags,
+						elem->msl->page_sz))
 					return elem;
 				if (alt_elem == NULL)
 					alt_elem = elem;
@@ -117,34 +149,770 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
 }
 
 /*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem, *max_elem = NULL;
+	size_t idx, max_size = 0;
+
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+				!!elem; elem = LIST_NEXT(elem, free_list)) {
+			size_t cur_size;
+			if (!check_hugepage_sz(flags, elem->msl->page_sz))
+				continue;
+			if (contig) {
+				cur_size =
+					malloc_elem_find_max_iova_contig(elem,
+							align);
+			} else {
+				void *data_start = RTE_PTR_ADD(elem,
+						MALLOC_ELEM_HEADER_LEN);
+				void *data_end = RTE_PTR_ADD(elem, elem->size -
+						MALLOC_ELEM_TRAILER_LEN);
+				void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+						align);
+				/* check if aligned data start is beyond end */
+				if (aligned >= data_end)
+					continue;
+				cur_size = RTE_PTR_DIFF(data_end, aligned);
+			}
+			if (cur_size > max_size) {
+				max_size = cur_size;
+				max_elem = elem;
+			}
+		}
+	}
+
+	*size = max_size;
+	return max_elem;
+}
+
+/*
  * Main function to allocate a block of memory from the heap.
  * It locks the free list, scans it, and adds a new memseg if the
  * scan fails. Once the new memseg is added, it re-scans and should return
  * the new element after releasing the lock.
  */
-void *
-malloc_heap_alloc(struct malloc_heap *heap,
-		const char *type __attribute__((unused)), size_t size, unsigned flags,
-		size_t align, size_t bound)
+static void *
+heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
+		unsigned int flags, size_t align, size_t bound, bool contig)
 {
 	struct malloc_elem *elem;
 
 	size = RTE_CACHE_LINE_ROUNDUP(size);
 	align = RTE_CACHE_LINE_ROUNDUP(align);
 
-	rte_spinlock_lock(&heap->lock);
+	elem = find_suitable_element(heap, size, flags, align, bound, contig);
+	if (elem != NULL) {
+		elem = malloc_elem_alloc(elem, size, align, bound, contig);
+
+		/* increase heap's count of allocated elements */
+		heap->alloc_count++;
+	}
 
-	elem = find_suitable_element(heap, size, flags, align, bound);
+	return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem;
+	size_t size;
+
+	align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	elem = find_biggest_element(heap, &size, flags, align, contig);
 	if (elem != NULL) {
-		elem = malloc_elem_alloc(elem, size, align, bound);
+		elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
 		/* increase heap's count of allocated elements */
 		heap->alloc_count++;
 	}
-	rte_spinlock_unlock(&heap->lock);
 
 	return elem == NULL ? NULL : (void *)(&elem[1]);
 }
 
+/* this function is exposed in malloc_mp.h */
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+		struct malloc_elem *elem, void *map_addr, size_t map_len)
+{
+	if (elem != NULL) {
+		malloc_elem_free_list_remove(elem);
+		malloc_elem_hide_region(elem, map_addr, map_len);
+	}
+
+	eal_memalloc_free_seg_bulk(ms, n_segs);
+}
+
+/* this function is exposed in malloc_mp.h */
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig, struct rte_memseg **ms, int n_segs)
+{
+	struct rte_memseg_list *msl;
+	struct malloc_elem *elem = NULL;
+	size_t alloc_sz;
+	int allocd_pages;
+	void *ret, *map_addr;
+
+	alloc_sz = (size_t)pg_sz * n_segs;
+
+	/* first, check if we're allowed to allocate this memory */
+	if (eal_memalloc_mem_alloc_validate(socket,
+			heap->total_size + alloc_sz) < 0) {
+		RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n");
+		return NULL;
+	}
+
+	allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
+			socket, true);
+
+	/* make sure we've allocated our pages... */
+	if (allocd_pages < 0)
+		return NULL;
+
+	map_addr = ms[0]->addr;
+	msl = rte_mem_virt2memseg_list(map_addr);
+
+	/* check if we wanted contiguous memory but didn't get it */
+	if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
+		RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",
+				__func__);
+		goto fail;
+	}
+
+	/* add newly minted memsegs to malloc heap */
+	elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+
+	/* try once more, as now we have allocated new memory */
+	ret = find_suitable_element(heap, elt_size, flags, align, bound,
+			contig);
+
+	if (ret == NULL)
+		goto fail;
+
+	return elem;
+
+fail:
+	rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+	return NULL;
+}
+
+static int
+try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
+		size_t elt_size, int socket, unsigned int flags, size_t align,
+		size_t bound, bool contig)
+{
+	struct malloc_elem *elem;
+	struct rte_memseg **ms;
+	void *map_addr;
+	size_t alloc_sz;
+	int n_segs;
+	bool callback_triggered = false;
+
+	alloc_sz = RTE_ALIGN_CEIL(align + elt_size +
+			MALLOC_ELEM_TRAILER_LEN, pg_sz);
+	n_segs = alloc_sz / pg_sz;
+
+	/* we can't know in advance how many pages we'll need, so we malloc */
+	ms = malloc(sizeof(*ms) * n_segs);
+
+	memset(ms, 0, sizeof(*ms) * n_segs);
+
+	if (ms == NULL)
+		return -1;
+
+	elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
+			bound, contig, ms, n_segs);
+
+	if (elem == NULL)
+		goto free_ms;
+
+	map_addr = ms[0]->addr;
+
+	/* notify user about changes in memory map */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+	/* notify other processes that this has happened */
+	if (request_sync()) {
+		/* we couldn't ensure all processes have mapped memory,
+		 * so free it back and notify everyone that it's been
+		 * freed back.
+		 *
+		 * technically, we could've avoided adding memory addresses to
+		 * the map, but that would've led to inconsistent behavior
+		 * between primary and secondary processes, as those get
+		 * callbacks during sync. therefore, force primary process to
+		 * do alloc-and-rollback syncs as well.
+		 */
+		callback_triggered = true;
+		goto free_elem;
+	}
+	heap->total_size += alloc_sz;
+
+	RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
+		socket, alloc_sz >> 20ULL);
+
+	free(ms);
+
+	return 0;
+
+free_elem:
+	if (callback_triggered)
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				map_addr, alloc_sz);
+
+	rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+
+	request_sync();
+free_ms:
+	free(ms);
+
+	return -1;
+}
+
+static int
+try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
+		size_t elt_size, int socket, unsigned int flags, size_t align,
+		size_t bound, bool contig)
+{
+	struct malloc_mp_req req;
+	int req_result;
+
+	memset(&req, 0, sizeof(req));
+
+	req.t = REQ_TYPE_ALLOC;
+	req.alloc_req.align = align;
+	req.alloc_req.bound = bound;
+	req.alloc_req.contig = contig;
+	req.alloc_req.flags = flags;
+	req.alloc_req.elt_size = elt_size;
+	req.alloc_req.page_sz = pg_sz;
+	req.alloc_req.socket = socket;
+	req.alloc_req.heap = heap; /* it's in shared memory */
+
+	req_result = request_to_primary(&req);
+
+	if (req_result != 0)
+		return -1;
+
+	if (req.result != REQ_RESULT_SUCCESS)
+		return -1;
+
+	return 0;
+}
+
+static int
+try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret;
+
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
+				flags, align, bound, contig);
+	} else {
+		ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
+				flags, align, bound, contig);
+	}
+
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+	return ret;
+}
+
+static int
+compare_pagesz(const void *a, const void *b)
+{
+	const struct rte_memseg_list * const*mpa = a;
+	const struct rte_memseg_list * const*mpb = b;
+	const struct rte_memseg_list *msla = *mpa;
+	const struct rte_memseg_list *mslb = *mpb;
+	uint64_t pg_sz_a = msla->page_sz;
+	uint64_t pg_sz_b = mslb->page_sz;
+
+	if (pg_sz_a < pg_sz_b)
+		return -1;
+	if (pg_sz_a > pg_sz_b)
+		return 1;
+	return 0;
+}
+
+static int
+alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
+	struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
+	uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
+	uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
+	uint64_t prev_pg_sz;
+	int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
+	bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
+	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	void *ret;
+
+	memset(requested_msls, 0, sizeof(requested_msls));
+	memset(other_msls, 0, sizeof(other_msls));
+	memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
+	memset(other_pg_sz, 0, sizeof(other_pg_sz));
+
+	/*
+	 * go through memseg list and take note of all the page sizes available,
+	 * and if any of them were specifically requested by the user.
+	 */
+	n_requested_msls = 0;
+	n_other_msls = 0;
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+		if (msl->socket_id != socket)
+			continue;
+
+		if (msl->base_va == NULL)
+			continue;
+
+		/* if pages of specific size were requested */
+		if (size_flags != 0 && check_hugepage_sz(size_flags,
+				msl->page_sz))
+			requested_msls[n_requested_msls++] = msl;
+		else if (size_flags == 0 || size_hint)
+			other_msls[n_other_msls++] = msl;
+	}
+
+	/* sort the lists, smallest first */
+	qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
+			compare_pagesz);
+	qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
+			compare_pagesz);
+
+	/* now, extract page sizes we are supposed to try */
+	prev_pg_sz = 0;
+	n_requested_pg_sz = 0;
+	for (i = 0; i < n_requested_msls; i++) {
+		uint64_t pg_sz = requested_msls[i]->page_sz;
+
+		if (prev_pg_sz != pg_sz) {
+			requested_pg_sz[n_requested_pg_sz++] = pg_sz;
+			prev_pg_sz = pg_sz;
+		}
+	}
+	prev_pg_sz = 0;
+	n_other_pg_sz = 0;
+	for (i = 0; i < n_other_msls; i++) {
+		uint64_t pg_sz = other_msls[i]->page_sz;
+
+		if (prev_pg_sz != pg_sz) {
+			other_pg_sz[n_other_pg_sz++] = pg_sz;
+			prev_pg_sz = pg_sz;
+		}
+	}
+
+	/* finally, try allocating memory of specified page sizes, starting from
+	 * the smallest sizes
+	 */
+	for (i = 0; i < n_requested_pg_sz; i++) {
+		uint64_t pg_sz = requested_pg_sz[i];
+
+		/*
+		 * do not pass the size hint here, as user expects other page
+		 * sizes first, before resorting to best effort allocation.
+		 */
+		if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
+				align, bound, contig))
+			return 0;
+	}
+	if (n_other_pg_sz == 0)
+		return -1;
+
+	/* now, check if we can reserve anything with size hint */
+	ret = find_suitable_element(heap, size, flags, align, bound, contig);
+	if (ret != NULL)
+		return 0;
+
+	/*
+	 * we still couldn't reserve memory, so try expanding heap with other
+	 * page sizes, if there are any
+	 */
+	for (i = 0; i < n_other_pg_sz; i++) {
+		uint64_t pg_sz = other_pg_sz[i];
+
+		if (!try_expand_heap(heap, pg_sz, size, socket, flags,
+				align, bound, contig))
+			return 0;
+	}
+	return -1;
+}
+
+/* this will try lower page sizes first */
+static void *
+heap_alloc_on_socket(const char *type, size_t size, int socket,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	void *ret;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	align = align == 0 ? 1 : align;
+
+	/* for legacy mode, try once and with all flags */
+	if (internal_config.legacy_mem) {
+		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+		goto alloc_unlock;
+	}
+
+	/*
+	 * we do not pass the size hint here, because even if allocation fails,
+	 * we may still be able to allocate memory from appropriate page sizes,
+	 * we just need to request more memory first.
+	 */
+	ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
+	if (ret != NULL)
+		goto alloc_unlock;
+
+	if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
+			contig)) {
+		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+
+		/* this should have succeeded */
+		if (ret == NULL)
+			RTE_LOG(ERR, EAL, "Error allocating from heap\n");
+	}
+alloc_unlock:
+	rte_spinlock_unlock(&(heap->lock));
+	return ret;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket_arg,
+		unsigned int flags, size_t align, size_t bound, bool contig)
+{
+	int socket, i, cur_socket;
+	void *ret;
+
+	/* return NULL if size is 0 or alignment is not power-of-2 */
+	if (size == 0 || (align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* Check socket parameter */
+	if (socket >= RTE_MAX_NUMA_NODES)
+		return NULL;
+
+	ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
+			contig);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps */
+	for (i = 0; i < (int) rte_socket_count(); i++) {
+		cur_socket = rte_socket_id_by_idx(i);
+		if (cur_socket == socket)
+			continue;
+		ret = heap_alloc_on_socket(type, size, cur_socket, flags,
+				align, bound, contig);
+		if (ret != NULL)
+			return ret;
+	}
+	return NULL;
+}
+
+static void *
+heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
+		size_t align, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	void *ret;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	align = align == 0 ? 1 : align;
+
+	ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+	rte_spinlock_unlock(&(heap->lock));
+
+	return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+		size_t align, bool contig)
+{
+	int socket, i, cur_socket;
+	void *ret;
+
+	/* return NULL if align is not power-of-2 */
+	if ((align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* Check socket parameter */
+	if (socket >= RTE_MAX_NUMA_NODES)
+		return NULL;
+
+	ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+			contig);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps */
+	for (i = 0; i < (int) rte_socket_count(); i++) {
+		cur_socket = rte_socket_id_by_idx(i);
+		if (cur_socket == socket)
+			continue;
+		ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
+				align, contig);
+		if (ret != NULL)
+			return ret;
+	}
+	return NULL;
+}
+
+/* this function is exposed in malloc_mp.h */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
+{
+	int n_segs, seg_idx, max_seg_idx;
+	struct rte_memseg_list *msl;
+	size_t page_sz;
+
+	msl = rte_mem_virt2memseg_list(aligned_start);
+	if (msl == NULL)
+		return -1;
+
+	page_sz = (size_t)msl->page_sz;
+	n_segs = aligned_len / page_sz;
+	seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
+	max_seg_idx = seg_idx + n_segs;
+
+	for (; seg_idx < max_seg_idx; seg_idx++) {
+		struct rte_memseg *ms;
+
+		ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
+		eal_memalloc_free_seg(ms);
+	}
+	return 0;
+}
+
+int
+malloc_heap_free(struct malloc_elem *elem)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap;
+	void *start, *aligned_start, *end, *aligned_end;
+	size_t len, aligned_len, page_sz;
+	struct rte_memseg_list *msl;
+	unsigned int i, n_segs, before_space, after_space;
+	int ret;
+
+	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+		return -1;
+
+	/* elem may be merged with previous element, so keep heap address */
+	heap = elem->heap;
+	msl = elem->msl;
+	page_sz = (size_t)msl->page_sz;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	/* mark element as free */
+	elem->state = ELEM_FREE;
+
+	elem = malloc_elem_free(elem);
+
+	/* anything after this is a bonus */
+	ret = 0;
+
+	/* ...of which we can't avail if we are in legacy mode */
+	if (internal_config.legacy_mem)
+		goto free_unlock;
+
+	/* check if we can free any memory back to the system */
+	if (elem->size < page_sz)
+		goto free_unlock;
+
+	/* probably, but let's make sure, as we may not be using up full page */
+	start = elem;
+	len = elem->size;
+	aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
+	end = RTE_PTR_ADD(elem, len);
+	aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
+
+	aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+	/* can't free anything */
+	if (aligned_len < page_sz)
+		goto free_unlock;
+
+	/* we can free something. however, some of these pages may be marked as
+	 * unfreeable, so also check that as well
+	 */
+	n_segs = aligned_len / page_sz;
+	for (i = 0; i < n_segs; i++) {
+		const struct rte_memseg *tmp =
+				rte_mem_virt2memseg(aligned_start, msl);
+
+		if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+			/* this is an unfreeable segment, so move start */
+			aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
+		}
+	}
+
+	/* recalculate length and number of segments */
+	aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+	n_segs = aligned_len / page_sz;
+
+	/* check if we can still free some pages */
+	if (n_segs == 0)
+		goto free_unlock;
+
+	/* We're not done yet. We also have to check if by freeing space we will
+	 * be leaving free elements that are too small to store new elements.
+	 * Check if we have enough space in the beginning and at the end, or if
+	 * start/end are exactly page aligned.
+	 */
+	before_space = RTE_PTR_DIFF(aligned_start, elem);
+	after_space = RTE_PTR_DIFF(end, aligned_end);
+	if (before_space != 0 &&
+			before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* There is not enough space before start, but we may be able to
+		 * move the start forward by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move start */
+		aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+	if (after_space != 0 && after_space <
+			MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* There is not enough space after end, but we may be able to
+		 * move the end backwards by one page.
+		 */
+		if (n_segs == 1)
+			goto free_unlock;
+
+		/* move end */
+		aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+		aligned_len -= page_sz;
+		n_segs--;
+	}
+
+	/* now we can finally free us some pages */
+
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+	/*
+	 * we allow secondary processes to clear the heap of this allocated
+	 * memory because it is safe to do so, as even if notifications about
+	 * unmapped pages don't make it to other processes, heap is shared
+	 * across all processes, and will become empty of this memory anyway,
+	 * and nothing can allocate it back unless primary process will be able
+	 * to deliver allocation message to every single running process.
+	 */
+
+	malloc_elem_free_list_remove(elem);
+
+	malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
+
+	heap->total_size -= aligned_len;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* notify user about changes in memory map */
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				aligned_start, aligned_len);
+
+		/* don't care if any of this fails */
+		malloc_heap_free_pages(aligned_start, aligned_len);
+
+		request_sync();
+	} else {
+		struct malloc_mp_req req;
+
+		memset(&req, 0, sizeof(req));
+
+		req.t = REQ_TYPE_FREE;
+		req.free_req.addr = aligned_start;
+		req.free_req.len = aligned_len;
+
+		/*
+		 * we request primary to deallocate pages, but we don't do it
+		 * in this thread. instead, we notify primary that we would like
+		 * to deallocate pages, and this process will receive another
+		 * request (in parallel) that will do it for us on another
+		 * thread.
+		 *
+		 * we also don't really care if this succeeds - the data is
+		 * already removed from the heap, so it is, for all intents and
+		 * purposes, hidden from the rest of DPDK even if some other
+		 * process (including this one) may have these pages mapped.
+		 *
+		 * notifications about deallocated memory happen during sync.
+		 */
+		request_to_primary(&req);
+	}
+
+	RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",
+		msl->socket_id, aligned_len >> 20ULL);
+
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+free_unlock:
+	rte_spinlock_unlock(&(heap->lock));
+	return ret;
+}
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size)
+{
+	int ret;
+
+	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+		return -1;
+
+	rte_spinlock_lock(&(elem->heap->lock));
+
+	ret = malloc_elem_resize(elem, size);
+
+	rte_spinlock_unlock(&(elem->heap->lock));
+
+	return ret;
+}
+
 /*
  * Function to retrieve data for heap on given socket
  */
@@ -183,21 +951,49 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 	return 0;
 }
 
+/*
+ * Function to retrieve data for heap on given socket
+ */
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f)
+{
+	struct malloc_elem *elem;
+
+	rte_spinlock_lock(&heap->lock);
+
+	fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
+	fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
+
+	elem = heap->first;
+	while (elem) {
+		malloc_elem_dump(elem, f);
+		elem = elem->next;
+	}
+
+	rte_spinlock_unlock(&heap->lock);
+}
+
 int
 rte_eal_malloc_heap_init(void)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	unsigned ms_cnt;
-	struct rte_memseg *ms;
 
-	if (mcfg == NULL)
+	if (register_mp_requests()) {
+		RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
+		rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 		return -1;
-
-	for (ms = &mcfg->memseg[0], ms_cnt = 0;
-			(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
-			ms_cnt++, ms++) {
-		malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
 	}
 
-	return 0;
+	/* unlock mem hotplug here. it's safe for primary as no requests can
+	 * even come before primary itself is fully initialized, and secondaries
+	 * do not need to initialize the heap.
+	 */
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	/* secondary process does not need to initialize anything */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	/* add all IOVA-contiguous areas to the heap */
+	return rte_memseg_contig_walk(malloc_add_seg, NULL);
 }
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index e0defa70..f52cb555 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -5,6 +5,8 @@
 #ifndef MALLOC_HEAP_H_
 #define MALLOC_HEAP_H_
 
+#include <stdbool.h>
+
 #include <rte_malloc.h>
 #include <rte_malloc_heap.h>
 
@@ -24,13 +26,26 @@ malloc_get_numa_socket(void)
 }
 
 void *
-malloc_heap_alloc(struct malloc_heap *heap,	const char *type, size_t size,
-		unsigned flags, size_t align, size_t bound);
+malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
+		size_t align, size_t bound, bool contig);
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+		size_t align, bool contig);
+
+int
+malloc_heap_free(struct malloc_elem *elem);
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size);
 
 int
 malloc_heap_get_stats(struct malloc_heap *heap,
 		struct rte_malloc_socket_stats *socket_stats);
 
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+
 int
 rte_eal_malloc_heap_init(void);
 
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
new file mode 100644
index 00000000..931c14bc
--- /dev/null
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_memalloc.h"
+
+#include "malloc_elem.h"
+#include "malloc_mp.h"
+
+#define MP_ACTION_SYNC "mp_malloc_sync"
+/**< request sent by primary process to notify of changes in memory map */
+#define MP_ACTION_ROLLBACK "mp_malloc_rollback"
+/**< request sent by primary process to notify of changes in memory map. this is
+ * essentially a regular sync request, but we cannot send sync requests while
+ * another one is in progress, and we might have to - therefore, we do this as
+ * a separate callback.
+ */
+#define MP_ACTION_REQUEST "mp_malloc_request"
+/**< request sent by secondary process to ask for allocation/deallocation */
+#define MP_ACTION_RESPONSE "mp_malloc_response"
+/**< response sent to secondary process to indicate result of request */
+
+/* forward declarations */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply);
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply);
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+/* when we're allocating, we need to store some state to ensure that we can
+ * roll back later
+ */
+struct primary_alloc_req_state {
+	struct malloc_heap *heap;
+	struct rte_memseg **ms;
+	int ms_len;
+	struct malloc_elem *elem;
+	void *map_addr;
+	size_t map_len;
+};
+
+enum req_state {
+	REQ_STATE_INACTIVE = 0,
+	REQ_STATE_ACTIVE,
+	REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+	TAILQ_ENTRY(mp_request) next;
+	struct malloc_mp_req user_req; /**< contents of request */
+	pthread_cond_t cond; /**< variable we use to time out on this request */
+	enum req_state state; /**< indicate status of this request */
+	struct primary_alloc_req_state alloc_state;
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+	struct mp_request_list list;
+	pthread_mutex_t lock;
+} mp_request_list = {
+	.list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+	.lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+/**
+ * General workflow is the following:
+ *
+ * Allocation:
+ * S: send request to primary
+ * P: attempt to allocate memory
+ *    if failed, sendmsg failure
+ *    if success, send sync request
+ * S: if received msg of failure, quit
+ *    if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    if success, sendmsg success
+ *    if failure, roll back allocation and send a rollback request
+ * S: if received msg of success, quit
+ *    if received rollback request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * Aside from timeouts, there are three points where we can quit:
+ *  - if allocation failed straight away
+ *  - if allocation and sync request succeeded
+ *  - if allocation succeeded, sync request failed, allocation rolled back and
+ *    rollback request received (irrespective of whether it succeeded or failed)
+ *
+ * Deallocation:
+ * S: send request to primary
+ * P: attempt to deallocate memory
+ *    if failed, sendmsg failure
+ *    if success, send sync request
+ * S: if received msg of failure, quit
+ *    if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ *    sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * There is no "rollback" from deallocation, as it's safe to have some memory
+ * mapped in some processes - it's absent from the heap, so it won't get used.
+ */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+	struct mp_request *req;
+	TAILQ_FOREACH(req, &mp_request_list.list, next) {
+		if (req->user_req.id == id)
+			break;
+	}
+	return req;
+}
+
+/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
+static uint64_t
+get_unique_id(void)
+{
+	uint64_t id;
+	do {
+		id = rte_rand();
+	} while (find_request_by_id(id) != NULL);
+	return id;
+}
+
+/* secondary will respond to sync requests thusly */
+static int
+handle_sync(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct rte_mp_msg reply;
+	const struct malloc_mp_req *req =
+			(const struct malloc_mp_req *)msg->param;
+	struct malloc_mp_req *resp =
+			(struct malloc_mp_req *)reply.param;
+	int ret;
+
+	if (req->t != REQ_TYPE_SYNC) {
+		RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
+		return -1;
+	}
+
+	memset(&reply, 0, sizeof(reply));
+
+	reply.num_fds = 0;
+	strlcpy(reply.name, msg->name, sizeof(reply.name));
+	reply.len_param = sizeof(*resp);
+
+	ret = eal_memalloc_sync_with_primary();
+
+	resp->t = REQ_TYPE_SYNC;
+	resp->id = req->id;
+	resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
+
+	rte_mp_reply(&reply, peer);
+
+	return 0;
+}
+
+static int
+handle_alloc_request(const struct malloc_mp_req *m,
+		struct mp_request *req)
+{
+	const struct malloc_req_alloc *ar = &m->alloc_req;
+	struct malloc_heap *heap;
+	struct malloc_elem *elem;
+	struct rte_memseg **ms;
+	size_t alloc_sz;
+	int n_segs;
+	void *map_addr;
+
+	alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
+			MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
+	n_segs = alloc_sz / ar->page_sz;
+
+	heap = ar->heap;
+
+	/* we can't know in advance how many pages we'll need, so we malloc */
+	ms = malloc(sizeof(*ms) * n_segs);
+
+	memset(ms, 0, sizeof(*ms) * n_segs);
+
+	if (ms == NULL) {
+		RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
+		goto fail;
+	}
+
+	elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
+			ar->flags, ar->align, ar->bound, ar->contig, ms,
+			n_segs);
+
+	if (elem == NULL)
+		goto fail;
+
+	map_addr = ms[0]->addr;
+
+	/* we have succeeded in allocating memory, but we still need to sync
+	 * with other processes. however, since DPDK IPC is single-threaded, we
+	 * send an asynchronous request and exit this callback.
+	 */
+
+	req->alloc_state.ms = ms;
+	req->alloc_state.ms_len = n_segs;
+	req->alloc_state.map_addr = map_addr;
+	req->alloc_state.map_len = alloc_sz;
+	req->alloc_state.elem = elem;
+	req->alloc_state.heap = heap;
+
+	return 0;
+fail:
+	free(ms);
+	return -1;
+}
+
+/* first stage of primary handling requests from secondary */
+static int
+handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+	const struct malloc_mp_req *m =
+			(const struct malloc_mp_req *)msg->param;
+	struct mp_request *entry;
+	int ret;
+
+	/* lock access to request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	/* make sure it's not a dupe */
+	entry = find_request_by_id(m->id);
+	if (entry != NULL) {
+		RTE_LOG(ERR, EAL, "Duplicate request id\n");
+		goto fail;
+	}
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
+		goto fail;
+	}
+
+	/* erase all data */
+	memset(entry, 0, sizeof(*entry));
+
+	if (m->t == REQ_TYPE_ALLOC) {
+		ret = handle_alloc_request(m, entry);
+	} else if (m->t == REQ_TYPE_FREE) {
+		ret = malloc_heap_free_pages(m->free_req.addr,
+				m->free_req.len);
+	} else {
+		RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
+		goto fail;
+	}
+
+	if (ret != 0) {
+		struct rte_mp_msg resp_msg;
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)resp_msg.param;
+
+		/* send failure message straight away */
+		resp_msg.num_fds = 0;
+		resp_msg.len_param = sizeof(*resp);
+		strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
+				sizeof(resp_msg.name));
+
+		resp->t = m->t;
+		resp->result = REQ_RESULT_FAIL;
+		resp->id = m->id;
+
+		if (rte_mp_sendmsg(&resp_msg)) {
+			RTE_LOG(ERR, EAL, "Couldn't send response\n");
+			goto fail;
+		}
+		/* we did not modify the request */
+		free(entry);
+	} else {
+		struct rte_mp_msg sr_msg;
+		struct malloc_mp_req *sr =
+				(struct malloc_mp_req *)sr_msg.param;
+		struct timespec ts;
+
+		memset(&sr_msg, 0, sizeof(sr_msg));
+
+		/* we can do something, so send sync request asynchronously */
+		sr_msg.num_fds = 0;
+		sr_msg.len_param = sizeof(*sr);
+		strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
+
+		ts.tv_nsec = 0;
+		ts.tv_sec = MP_TIMEOUT_S;
+
+		/* sync requests carry no data */
+		sr->t = REQ_TYPE_SYNC;
+		sr->id = m->id;
+
+		/* there may be stray timeout still waiting */
+		do {
+			ret = rte_mp_request_async(&sr_msg, &ts,
+					handle_sync_response);
+		} while (ret != 0 && rte_errno == EEXIST);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
+			if (m->t == REQ_TYPE_ALLOC)
+				free(entry->alloc_state.ms);
+			goto fail;
+		}
+
+		/* mark request as in progress */
+		memcpy(&entry->user_req, m, sizeof(*m));
+		entry->state = REQ_STATE_ACTIVE;
+
+		TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+	}
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	free(entry);
+	return -1;
+}
+
+/* callback for asynchronous sync requests for primary. this will either do a
+ * sendmsg with results, or trigger rollback request.
+ */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply)
+{
+	enum malloc_req_result result;
+	struct mp_request *entry;
+	const struct malloc_mp_req *mpreq =
+			(const struct malloc_mp_req *)request->param;
+	int i;
+
+	/* lock the request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = find_request_by_id(mpreq->id);
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Wrong request ID\n");
+		goto fail;
+	}
+
+	result = REQ_RESULT_SUCCESS;
+
+	if (reply->nb_received != reply->nb_sent)
+		result = REQ_RESULT_FAIL;
+
+	for (i = 0; i < reply->nb_received; i++) {
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)reply->msgs[i].param;
+
+		if (resp->t != REQ_TYPE_SYNC) {
+			RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+		if (resp->id != entry->user_req.id) {
+			RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+		if (resp->result == REQ_RESULT_FAIL) {
+			result = REQ_RESULT_FAIL;
+			break;
+		}
+	}
+
+	if (entry->user_req.t == REQ_TYPE_FREE) {
+		struct rte_mp_msg msg;
+		struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+
+		memset(&msg, 0, sizeof(msg));
+
+		/* this is a free request, just sendmsg result */
+		resp->t = REQ_TYPE_FREE;
+		resp->result = result;
+		resp->id = entry->user_req.id;
+		msg.num_fds = 0;
+		msg.len_param = sizeof(*resp);
+		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+		if (rte_mp_sendmsg(&msg))
+			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+		TAILQ_REMOVE(&mp_request_list.list, entry, next);
+		free(entry);
+	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+			result == REQ_RESULT_SUCCESS) {
+		struct malloc_heap *heap = entry->alloc_state.heap;
+		struct rte_mp_msg msg;
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)msg.param;
+
+		memset(&msg, 0, sizeof(msg));
+
+		heap->total_size += entry->alloc_state.map_len;
+
+		/* result is success, so just notify secondary about this */
+		resp->t = REQ_TYPE_ALLOC;
+		resp->result = result;
+		resp->id = entry->user_req.id;
+		msg.num_fds = 0;
+		msg.len_param = sizeof(*resp);
+		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+		if (rte_mp_sendmsg(&msg))
+			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+		TAILQ_REMOVE(&mp_request_list.list, entry, next);
+		free(entry->alloc_state.ms);
+		free(entry);
+	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+			result == REQ_RESULT_FAIL) {
+		struct rte_mp_msg rb_msg;
+		struct malloc_mp_req *rb =
+				(struct malloc_mp_req *)rb_msg.param;
+		struct timespec ts;
+		struct primary_alloc_req_state *state =
+				&entry->alloc_state;
+		int ret;
+
+		memset(&rb_msg, 0, sizeof(rb_msg));
+
+		/* we've failed to sync, so do a rollback */
+		rollback_expand_heap(state->ms, state->ms_len, state->elem,
+				state->map_addr, state->map_len);
+
+		/* send rollback request */
+		rb_msg.num_fds = 0;
+		rb_msg.len_param = sizeof(*rb);
+		strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
+
+		ts.tv_nsec = 0;
+		ts.tv_sec = MP_TIMEOUT_S;
+
+		/* sync requests carry no data */
+		rb->t = REQ_TYPE_SYNC;
+		rb->id = entry->user_req.id;
+
+		/* there may be stray timeout still waiting */
+		do {
+			ret = rte_mp_request_async(&rb_msg, &ts,
+					handle_rollback_response);
+		} while (ret != 0 && rte_errno == EEXIST);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
+
+			/* we couldn't send rollback request, but that's OK -
+			 * secondary will time out, and memory has been removed
+			 * from heap anyway.
+			 */
+			TAILQ_REMOVE(&mp_request_list.list, entry, next);
+			free(state->ms);
+			free(entry);
+			goto fail;
+		}
+	} else {
+		RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
+		goto fail;
+	}
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return -1;
+}
+
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+		const struct rte_mp_reply *reply __rte_unused)
+{
+	struct rte_mp_msg msg;
+	struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+	const struct malloc_mp_req *mpreq =
+			(const struct malloc_mp_req *)request->param;
+	struct mp_request *entry;
+
+	/* lock the request */
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	memset(&msg, 0, sizeof(0));
+
+	entry = find_request_by_id(mpreq->id);
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Wrong request ID\n");
+		goto fail;
+	}
+
+	if (entry->user_req.t != REQ_TYPE_ALLOC) {
+		RTE_LOG(ERR, EAL, "Unexpected active request\n");
+		goto fail;
+	}
+
+	/* we don't care if rollback succeeded, request still failed */
+	resp->t = REQ_TYPE_ALLOC;
+	resp->result = REQ_RESULT_FAIL;
+	resp->id = mpreq->id;
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*resp);
+	strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+	if (rte_mp_sendmsg(&msg))
+		RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+	/* clean up */
+	TAILQ_REMOVE(&mp_request_list.list, entry, next);
+	free(entry->alloc_state.ms);
+	free(entry);
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return 0;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return -1;
+}
+
+/* final stage of the request from secondary */
+static int
+handle_response(const struct rte_mp_msg *msg, const void *peer  __rte_unused)
+{
+	const struct malloc_mp_req *m =
+			(const struct malloc_mp_req *)msg->param;
+	struct mp_request *entry;
+
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = find_request_by_id(m->id);
+	if (entry != NULL) {
+		/* update request status */
+		entry->user_req.result = m->result;
+
+		entry->state = REQ_STATE_COMPLETE;
+
+		/* trigger thread wakeup */
+		pthread_cond_signal(&entry->cond);
+	}
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+
+	return 0;
+}
+
+/* synchronously request memory map sync, this is only called whenever primary
+ * process initiates the allocation.
+ */
+int
+request_sync(void)
+{
+	struct rte_mp_msg msg;
+	struct rte_mp_reply reply;
+	struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
+	struct timespec ts;
+	int i, ret;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&reply, 0, sizeof(reply));
+
+	/* no need to create tailq entries as this is entirely synchronous */
+
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*req);
+	strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
+
+	/* sync request carries no data */
+	req->t = REQ_TYPE_SYNC;
+	req->id = get_unique_id();
+
+	ts.tv_nsec = 0;
+	ts.tv_sec = MP_TIMEOUT_S;
+
+	/* there may be stray timeout still waiting */
+	do {
+		ret = rte_mp_request_sync(&msg, &reply, &ts);
+	} while (ret != 0 && rte_errno == EEXIST);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
+		ret = -1;
+		goto out;
+	}
+
+	if (reply.nb_received != reply.nb_sent) {
+		RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
+		ret = -1;
+		goto out;
+	}
+
+	for (i = 0; i < reply.nb_received; i++) {
+		struct malloc_mp_req *resp =
+				(struct malloc_mp_req *)reply.msgs[i].param;
+		if (resp->t != REQ_TYPE_SYNC) {
+			RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
+			ret = -1;
+			goto out;
+		}
+		if (resp->id != req->id) {
+			RTE_LOG(ERR, EAL, "Wrong request ID\n");
+			ret = -1;
+			goto out;
+		}
+		if (resp->result != REQ_RESULT_SUCCESS) {
+			RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
+			ret = -1;
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	free(reply.msgs);
+	return ret;
+}
+
+/* this is a synchronous wrapper around a bunch of asynchronous requests to
+ * primary process. this will initiate a request and wait until responses come.
+ */
+int
+request_to_primary(struct malloc_mp_req *user_req)
+{
+	struct rte_mp_msg msg;
+	struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
+	struct mp_request *entry;
+	struct timespec ts;
+	struct timeval now;
+	int ret;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&ts, 0, sizeof(ts));
+
+	pthread_mutex_lock(&mp_request_list.lock);
+
+	entry = malloc(sizeof(*entry));
+	if (entry == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
+		goto fail;
+	}
+
+	memset(entry, 0, sizeof(*entry));
+
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Cannot get current time\n");
+		goto fail;
+	}
+
+	ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+	ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+			(now.tv_usec * 1000) / 1000000000;
+
+	/* initialize the request */
+	pthread_cond_init(&entry->cond, NULL);
+
+	msg.num_fds = 0;
+	msg.len_param = sizeof(*msg_req);
+	strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
+
+	/* (attempt to) get a unique id */
+	user_req->id = get_unique_id();
+
+	/* copy contents of user request into the message */
+	memcpy(msg_req, user_req, sizeof(*msg_req));
+
+	if (rte_mp_sendmsg(&msg)) {
+		RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
+		goto fail;
+	}
+
+	/* copy contents of user request into active request */
+	memcpy(&entry->user_req, user_req, sizeof(*user_req));
+
+	/* mark request as in progress */
+	entry->state = REQ_STATE_ACTIVE;
+
+	TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+	/* finally, wait on timeout */
+	do {
+		ret = pthread_cond_timedwait(&entry->cond,
+				&mp_request_list.lock, &ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
+	if (entry->state != REQ_STATE_COMPLETE) {
+		RTE_LOG(ERR, EAL, "Request timed out\n");
+		ret = -1;
+	} else {
+		ret = 0;
+		user_req->result = entry->user_req.result;
+	}
+	TAILQ_REMOVE(&mp_request_list.list, entry, next);
+	free(entry);
+
+	pthread_mutex_unlock(&mp_request_list.lock);
+	return ret;
+fail:
+	pthread_mutex_unlock(&mp_request_list.lock);
+	free(entry);
+	return -1;
+}
+
+int
+register_mp_requests(void)
+{
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_REQUEST);
+			return -1;
+		}
+	} else {
+		if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_SYNC);
+			return -1;
+		}
+		if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_SYNC);
+			return -1;
+		}
+		if (rte_mp_action_register(MP_ACTION_RESPONSE,
+				handle_response)) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				MP_ACTION_RESPONSE);
+			return -1;
+		}
+	}
+	return 0;
+}
diff --git a/lib/librte_eal/common/malloc_mp.h b/lib/librte_eal/common/malloc_mp.h
new file mode 100644
index 00000000..2b86b76f
--- /dev/null
+++ b/lib/librte_eal/common/malloc_mp.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef MALLOC_MP_H
+#define MALLOC_MP_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+/* forward declarations */
+struct malloc_heap;
+struct rte_memseg;
+
+/* multiprocess synchronization structures for malloc */
+enum malloc_req_type {
+	REQ_TYPE_ALLOC,     /**< ask primary to allocate */
+	REQ_TYPE_FREE,      /**< ask primary to free */
+	REQ_TYPE_SYNC       /**< ask secondary to synchronize its memory map */
+};
+
+enum malloc_req_result {
+	REQ_RESULT_SUCCESS,
+	REQ_RESULT_FAIL
+};
+
+struct malloc_req_alloc {
+	struct malloc_heap *heap;
+	uint64_t page_sz;
+	size_t elt_size;
+	int socket;
+	unsigned int flags;
+	size_t align;
+	size_t bound;
+	bool contig;
+};
+
+struct malloc_req_free {
+	RTE_STD_C11
+	union {
+		void *addr;
+		uint64_t addr_64;
+	};
+	uint64_t len;
+};
+
+struct malloc_mp_req {
+	enum malloc_req_type t;
+	RTE_STD_C11
+	union {
+		struct malloc_req_alloc alloc_req;
+		struct malloc_req_free free_req;
+	};
+	uint64_t id; /**< not to be populated by caller */
+	enum malloc_req_result result;
+};
+
+int
+register_mp_requests(void);
+
+int
+request_to_primary(struct malloc_mp_req *req);
+
+/* synchronous memory map sync request */
+int
+request_sync(void);
+
+/* functions from malloc_heap exposed here */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len);
+
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+		int socket, unsigned int flags, size_t align, size_t bound,
+		bool contig, struct rte_memseg **ms, int n_segs);
+
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+		struct malloc_elem *elem, void *map_addr, size_t map_len);
+
+#endif /* MALLOC_MP_H */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 82b8910f..56005bea 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,13 +8,16 @@ common_objs = []
 common_sources = files(
 	'eal_common_bus.c',
 	'eal_common_cpuflags.c',
+	'eal_common_class.c',
 	'eal_common_devargs.c',
 	'eal_common_dev.c',
 	'eal_common_errno.c',
+	'eal_common_fbarray.c',
 	'eal_common_hexdump.c',
 	'eal_common_launch.c',
 	'eal_common_lcore.c',
 	'eal_common_log.c',
+	'eal_common_memalloc.c',
 	'eal_common_memory.c',
 	'eal_common_memzone.c',
 	'eal_common_options.c',
@@ -23,8 +26,10 @@ common_sources = files(
 	'eal_common_tailqs.c',
 	'eal_common_thread.c',
 	'eal_common_timer.c',
+	'eal_common_uuid.c',
 	'malloc_elem.c',
 	'malloc_heap.c',
+	'malloc_mp.c',
 	'rte_keepalive.c',
 	'rte_malloc.c',
 	'rte_reciprocal.c',
@@ -43,6 +48,7 @@ common_headers = files(
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
 	'include/rte_bitmap.h',
+	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_debug.h',
 	'include/rte_devargs.h',
@@ -51,6 +57,7 @@ common_headers = files(
 	'include/rte_eal_memconfig.h',
 	'include/rte_eal_interrupts.h',
 	'include/rte_errno.h',
+	'include/rte_fbarray.h',
 	'include/rte_hexdump.h',
 	'include/rte_interrupts.h',
 	'include/rte_keepalive.h',
@@ -71,6 +78,7 @@ common_headers = files(
 	'include/rte_string_fns.h',
 	'include/rte_tailq.h',
 	'include/rte_time.h',
+	'include/rte_uuid.h',
 	'include/rte_version.h')
 
 # special case install the generic headers, since they go in a subdir
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index e0e0d0b3..b51a6d11 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -29,20 +29,17 @@
 void rte_free(void *addr)
 {
 	if (addr == NULL) return;
-	if (malloc_elem_free(malloc_elem_from_data(addr)) < 0)
-		rte_panic("Fatal error: Invalid memory\n");
+	if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
+		RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
 }
 
 /*
  * Allocate memory on specified heap.
  */
 void *
-rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+		int socket_arg)
 {
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int socket, i;
-	void *ret;
-
 	/* return NULL if size is 0 or alignment is not power-of-2 */
 	if (size == 0 || (align && !rte_is_power_of_2(align)))
 		return NULL;
@@ -50,33 +47,12 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
 	if (!rte_eal_has_hugepages())
 		socket_arg = SOCKET_ID_ANY;
 
-	if (socket_arg == SOCKET_ID_ANY)
-		socket = malloc_get_numa_socket();
-	else
-		socket = socket_arg;
-
 	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	if (socket_arg >= RTE_MAX_NUMA_NODES)
 		return NULL;
 
-	ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,
-				size, 0, align == 0 ? 1 : align, 0);
-	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
-		return ret;
-
-	/* try other heaps */
-	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-		/* we already tried this one */
-		if (i == socket)
-			continue;
-
-		ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type,
-					size, 0, align == 0 ? 1 : align, 0);
-		if (ret != NULL)
-			return ret;
-	}
-
-	return NULL;
+	return malloc_heap_alloc(type, size, socket_arg, 0,
+			align == 0 ? 1 : align, 0, false);
 }
 
 /*
@@ -134,13 +110,15 @@ rte_realloc(void *ptr, size_t size, unsigned align)
 		return rte_malloc(NULL, size, align);
 
 	struct malloc_elem *elem = malloc_elem_from_data(ptr);
-	if (elem == NULL)
-		rte_panic("Fatal error: memory corruption detected\n");
+	if (elem == NULL) {
+		RTE_LOG(ERR, EAL, "Error: memory corruption detected\n");
+		return NULL;
+	}
 
 	size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
 	/* check alignment matches first, and if ok, see if we can resize block */
 	if (RTE_PTR_ALIGN(ptr,align) == ptr &&
-			malloc_elem_resize(elem, size) == 0)
+			malloc_heap_resize(elem, size) == 0)
 		return ptr;
 
 	/* either alignment is off, or we have no room to expand,
@@ -182,6 +160,23 @@ rte_malloc_get_socket_stats(int socket,
 }
 
 /*
+ * Function to dump contents of all heaps
+ */
+void __rte_experimental
+rte_malloc_dump_heaps(FILE *f)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int idx;
+
+	for (idx = 0; idx < rte_socket_count(); idx++) {
+		unsigned int socket = rte_socket_id_by_idx(idx);
+		fprintf(f, "Heap on socket %i:\n", socket);
+		malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+	}
+
+}
+
+/*
  * Print stats on memory type. If type is NULL, info on all types is printed
  */
 void
@@ -222,17 +217,21 @@ rte_malloc_set_limit(__rte_unused const char *type,
 rte_iova_t
 rte_malloc_virt2iova(const void *addr)
 {
-	rte_iova_t iova;
-	const struct malloc_elem *elem = malloc_elem_from_data(addr);
+	const struct rte_memseg *ms;
+	struct malloc_elem *elem = malloc_elem_from_data(addr);
+
 	if (elem == NULL)
 		return RTE_BAD_IOVA;
-	if (elem->ms->iova == RTE_BAD_IOVA)
-		return RTE_BAD_IOVA;
 
 	if (rte_eal_iova_mode() == RTE_IOVA_VA)
-		iova = (uintptr_t)addr;
-	else
-		iova = elem->ms->iova +
-			RTE_PTR_DIFF(addr, elem->ms->addr);
-	return iova;
+		return (uintptr_t) addr;
+
+	ms = rte_mem_virt2memseg(addr, elem->msl);
+	if (ms == NULL)
+		return RTE_BAD_IOVA;
+
+	if (ms->iova == RTE_BAD_IOVA)
+		return RTE_BAD_IOVA;
+
+	return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
 }
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index be9b5e6d..8767c722 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -52,6 +52,7 @@ struct rte_service_spec_impl {
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
+	uint8_t active_on_lcore[RTE_MAX_LCORE];
 } __rte_cache_aligned;
 
 /* the internal values of a service core */
@@ -61,7 +62,7 @@ struct core_state {
 	uint8_t runstate; /* running or stopped */
 	uint8_t is_service_core; /* set if core is currently a service core */
 
-	/* extreme statistics */
+	uint64_t loops;
 	uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
 } __rte_cache_aligned;
 
@@ -115,7 +116,7 @@ fail_mem:
 	return -ENOMEM;
 }
 
-void __rte_experimental
+void
 rte_service_finalize(void)
 {
 	if (!rte_service_library_initialized)
@@ -161,7 +162,7 @@ service_mt_safe(struct rte_service_spec_impl *s)
 	return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_set_stats_enable(uint32_t id, int32_t enabled)
 {
 	struct rte_service_spec_impl *s;
@@ -175,7 +176,7 @@ rte_service_set_stats_enable(uint32_t id, int32_t enabled)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
 {
 	struct rte_service_spec_impl *s;
@@ -189,13 +190,13 @@ rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
 	return 0;
 }
 
-uint32_t __rte_experimental
+uint32_t
 rte_service_get_count(void)
 {
 	return rte_service_count;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_get_by_name(const char *name, uint32_t *service_id)
 {
 	if (!service_id)
@@ -213,7 +214,7 @@ rte_service_get_by_name(const char *name, uint32_t *service_id)
 	return -ENODEV;
 }
 
-const char * __rte_experimental
+const char *
 rte_service_get_name(uint32_t id)
 {
 	struct rte_service_spec_impl *s;
@@ -221,7 +222,7 @@ rte_service_get_name(uint32_t id)
 	return s->spec.name;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_probe_capability(uint32_t id, uint32_t capability)
 {
 	struct rte_service_spec_impl *s;
@@ -229,7 +230,7 @@ rte_service_probe_capability(uint32_t id, uint32_t capability)
 	return !!(s->spec.capabilities & capability);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_component_register(const struct rte_service_spec *spec,
 			       uint32_t *id_ptr)
 {
@@ -262,7 +263,7 @@ rte_service_component_register(const struct rte_service_spec *spec,
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_component_unregister(uint32_t id)
 {
 	uint32_t i;
@@ -283,7 +284,7 @@ rte_service_component_unregister(uint32_t id)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
 {
 	struct rte_service_spec_impl *s;
@@ -298,7 +299,7 @@ rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_runstate_set(uint32_t id, uint32_t runstate)
 {
 	struct rte_service_spec_impl *s;
@@ -313,7 +314,7 @@ rte_service_runstate_set(uint32_t id, uint32_t runstate)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_runstate_get(uint32_t id)
 {
 	struct rte_service_spec_impl *s;
@@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 
 
 static inline int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
 {
 	if (!service_valid(i))
 		return -EINVAL;
 	struct rte_service_spec_impl *s = &rte_services[i];
 	if (s->comp_runstate != RUNSTATE_RUNNING ||
 			s->app_runstate != RUNSTATE_RUNNING ||
-			!(service_mask & (UINT64_C(1) << i)))
+			!(service_mask & (UINT64_C(1) << i))) {
+		s->active_on_lcore[lcore] = 0;
 		return -ENOEXEC;
+	}
+
+	s->active_on_lcore[lcore] = 1;
 
 	/* check do we need cmpset, if MT safe or <= 1 core
 	 * mapped, atomic ops are not required.
@@ -374,7 +379,26 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
 	return 0;
 }
 
-int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id)
+{
+	uint32_t ids[RTE_MAX_LCORE] = {0};
+	struct rte_service_spec_impl *s = &rte_services[id];
+	int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+	int i;
+
+	if (!service_valid(id))
+		return -EINVAL;
+
+	for (i = 0; i < lcore_count; i++) {
+		if (s->active_on_lcore[ids[i]])
+			return 1;
+	}
+
+	return 0;
+}
+
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
 		uint32_t serialize_mt_unsafe)
 {
 	/* run service on calling core, using all-ones as the service mask */
@@ -398,7 +422,7 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
 		return -EBUSY;
 	}
 
-	int ret = service_run(id, cs, UINT64_MAX);
+	int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
 
 	if (serialize_mt_unsafe)
 		rte_atomic32_dec(&s->num_mapped_cores);
@@ -419,9 +443,11 @@ rte_service_runner_func(void *arg)
 
 		for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask);
+			service_run(i, lcore, cs, service_mask);
 		}
 
+		cs->loops++;
+
 		rte_smp_rmb();
 	}
 
@@ -430,7 +456,7 @@ rte_service_runner_func(void *arg)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_count(void)
 {
 	int32_t count = 0;
@@ -440,7 +466,7 @@ rte_service_lcore_count(void)
 	return count;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_list(uint32_t array[], uint32_t n)
 {
 	uint32_t count = rte_service_lcore_count();
@@ -463,7 +489,7 @@ rte_service_lcore_list(uint32_t array[], uint32_t n)
 	return count;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_count_services(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
@@ -476,7 +502,7 @@ rte_service_lcore_count_services(uint32_t lcore)
 	return __builtin_popcountll(cs->service_mask);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_start_with_defaults(void)
 {
 	/* create a default mapping from cores to services, then start the
@@ -562,7 +588,7 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
 	struct rte_service_spec_impl *s;
@@ -571,7 +597,7 @@ rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 	return service_update(&s->spec, lcore, &on, 0);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
 {
 	struct rte_service_spec_impl *s;
@@ -597,7 +623,7 @@ set_lcore_state(uint32_t lcore, int32_t state)
 	lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_reset_all(void)
 {
 	/* loop over cores, reset all to mask 0 */
@@ -617,7 +643,7 @@ rte_service_lcore_reset_all(void)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_add(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
@@ -636,7 +662,7 @@ rte_service_lcore_add(uint32_t lcore)
 	return rte_eal_wait_lcore(lcore);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_del(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
@@ -655,7 +681,7 @@ rte_service_lcore_del(uint32_t lcore)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_start(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
@@ -678,7 +704,7 @@ rte_service_lcore_start(uint32_t lcore)
 	return ret;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_lcore_stop(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
@@ -708,7 +734,7 @@ rte_service_lcore_stop(uint32_t lcore)
 	return 0;
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
 {
 	struct rte_service_spec_impl *s;
@@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
 	}
 }
 
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+			   uint64_t *attr_value)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE || !attr_value)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	switch (attr_id) {
+	case RTE_SERVICE_LCORE_ATTR_LOOPS:
+		*attr_value = cs->loops;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static void
 rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
@@ -753,7 +801,7 @@ rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 			s->cycles_spent, s->cycles_spent / calls);
 }
 
-int32_t __rte_experimental
+int32_t
 rte_service_attr_reset_all(uint32_t id)
 {
 	struct rte_service_spec_impl *s;
@@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id)
 	return 0;
 }
 
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	cs->loops = 0;
+
+	return 0;
+}
+
 static void
 service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
 {
@@ -781,7 +846,8 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
 	fprintf(f, "\n");
 }
 
-int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id)
+int32_t
+rte_service_dump(FILE *f, uint32_t id)
 {
 	uint32_t i;
 	int print_one = (id != UINT32_MAX);
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index aa52a01e..a0fffa98 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -4,8 +4,6 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
-DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
-DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
 DEPDIRS-kni := eal
 
 CFLAGS += -DALLOW_EXPERIMENTAL_API
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 7e5bbe88..fd92c75c 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
 EXPORT_MAP := ../../rte_eal_version.map
 VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 
-LIBABIVER := 6
+LIBABIVER := 8
 
 VPATH += $(RTE_SDK)/lib/librte_eal/common
 
@@ -24,23 +24,27 @@ LDLIBS += -ldl
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
 LDLIBS += -lrt
+LDLIBS += -lrte_kvargs
 ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
 LDLIBS += -lnuma
 endif
 
 # specific to linuxapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_cpuflags.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memalloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_dev.c
 
 # from common dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
@@ -48,6 +52,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memalloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c
@@ -56,14 +61,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hypervisor.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_class.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c
@@ -81,6 +90,7 @@ CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
 CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE
 CFLAGS_eal_timer.o := -D_GNU_SOURCE
 CFLAGS_eal_lcore.o := -D_GNU_SOURCE
+CFLAGS_eal_memalloc.o := -D_GNU_SOURCE
 CFLAGS_eal_thread.o := -D_GNU_SOURCE
 CFLAGS_eal_log.o := -D_GNU_SOURCE
 CFLAGS_eal_common_log.o := -D_GNU_SOURCE
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf5..e59ac657 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -74,8 +74,8 @@ static int mem_cfg_fd = -1;
 static struct flock wr_lock = {
 		.l_type = F_WRLCK,
 		.l_whence = SEEK_SET,
-		.l_start = offsetof(struct rte_mem_config, memseg),
-		.l_len = sizeof(early_mem_config.memseg),
+		.l_start = offsetof(struct rte_mem_config, memsegs),
+		.l_len = sizeof(early_mem_config.memsegs),
 };
 
 /* Address of global and public configuration */
@@ -92,20 +92,72 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
-/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
 {
-	return internal_config.user_mbuf_pool_ops_name;
+	const char *directory = default_runtime_dir;
+	const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+	const char *fallback = "/tmp";
+	char tmp[PATH_MAX];
+	int ret;
+
+	if (getuid() != 0) {
+		/* try XDG path first, fall back to /tmp */
+		if (xdg_runtime_dir != NULL)
+			directory = xdg_runtime_dir;
+		else
+			directory = fallback;
+	}
+	/* create DPDK subdirectory under runtime dir */
+	ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+	if (ret < 0 || ret == sizeof(tmp)) {
+		RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+		return -1;
+	}
+
+	/* create prefix-specific subdirectory under DPDK runtime dir */
+	ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+			tmp, internal_config.hugefile_prefix);
+	if (ret < 0 || ret == sizeof(runtime_dir)) {
+		RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+		return -1;
+	}
+
+	/* create the path if it doesn't exist. no "mkdir -p" here, so do it
+	 * step by step.
+	 */
+	ret = mkdir(tmp, 0700);
+	if (ret < 0 && errno != EEXIST) {
+		RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+			tmp, strerror(errno));
+		return -1;
+	}
+
+	ret = mkdir(runtime_dir, 0700);
+	if (ret < 0 && errno != EEXIST) {
+		RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+			runtime_dir, strerror(errno));
+		return -1;
+	}
+
+	return 0;
 }
 
-/* Return mbuf pool ops name */
 const char *
-rte_eal_mbuf_default_mempool_ops(void)
+eal_get_runtime_dir(void)
 {
-	if (internal_config.user_mbuf_pool_ops_name == NULL)
-		return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
+	return runtime_dir;
+}
 
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
 	return internal_config.user_mbuf_pool_ops_name;
 }
 
@@ -282,12 +334,17 @@ eal_proc_type_detect(void)
 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
 	const char *pathname = eal_runtime_config_path();
 
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
+	/* if there no shared config, there can be no secondary processes */
+	if (!internal_config.no_shconf) {
+		/* if we can open the file but not get a write-lock we are a
+		 * secondary process. NOTE: if we get a file handle back, we
+		 * keep that open and don't close it to prevent a race condition
+		 * between multiple opens.
+		 */
+		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+			ptype = RTE_PROC_SECONDARY;
+	}
 
 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -343,11 +400,14 @@ eal_usage(const char *prgname)
 	eal_common_usage();
 	printf("EAL Linux options:\n"
 	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
+	       "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
 	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
 	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
 	       "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
 	       "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
+	       "  --"OPT_LEGACY_MEM"        Legacy memory mode (no dynamic allocation, contiguous segments)\n"
+	       "  --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
 	       "\n");
 	/* Allow the application to print its usage message too if hook is set */
 	if ( rte_application_usage_hook ) {
@@ -370,46 +430,45 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func )
 }
 
 static int
-eal_parse_socket_mem(char *socket_mem)
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
 {
 	char * arg[RTE_MAX_NUMA_NODES];
 	char *end;
 	int arg_num, i, len;
 	uint64_t total_mem = 0;
 
-	len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+	len = strnlen(strval, SOCKET_MEM_STRLEN);
 	if (len == SOCKET_MEM_STRLEN) {
 		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
 		return -1;
 	}
 
 	/* all other error cases will be caught later */
-	if (!isdigit(socket_mem[len-1]))
+	if (!isdigit(strval[len-1]))
 		return -1;
 
 	/* split the optarg into separate socket values */
-	arg_num = rte_strsplit(socket_mem, len,
+	arg_num = rte_strsplit(strval, len,
 			arg, RTE_MAX_NUMA_NODES, ',');
 
 	/* if split failed, or 0 arguments */
 	if (arg_num <= 0)
 		return -1;
 
-	internal_config.force_sockets = 1;
-
 	/* parse each defined socket option */
 	errno = 0;
 	for (i = 0; i < arg_num; i++) {
+		uint64_t val;
 		end = NULL;
-		internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+		val = strtoull(arg[i], &end, 10);
 
 		/* check for invalid input */
 		if ((errno != 0)  ||
 				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
 			return -1;
-		internal_config.socket_mem[i] *= 1024ULL;
-		internal_config.socket_mem[i] *= 1024ULL;
-		total_mem += internal_config.socket_mem[i];
+		val <<= 20;
+		total_mem += val;
+		socket_arg[i] = val;
 	}
 
 	/* check if we have a positive amount of total memory */
@@ -557,13 +616,27 @@ eal_parse_args(int argc, char **argv)
 			break;
 
 		case OPT_SOCKET_MEM_NUM:
-			if (eal_parse_socket_mem(optarg) < 0) {
+			if (eal_parse_socket_arg(optarg,
+					internal_config.socket_mem) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
 						OPT_SOCKET_MEM "\n");
 				eal_usage(prgname);
 				ret = -1;
 				goto out;
 			}
+			internal_config.force_sockets = 1;
+			break;
+
+		case OPT_SOCKET_LIMIT_NUM:
+			if (eal_parse_socket_arg(optarg,
+					internal_config.socket_limit) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameters for --"
+						OPT_SOCKET_LIMIT "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			internal_config.force_socket_limits = 1;
 			break;
 
 		case OPT_BASE_VIRTADDR_NUM:
@@ -591,7 +664,8 @@ eal_parse_args(int argc, char **argv)
 			break;
 
 		case OPT_MBUF_POOL_OPS_NAME_NUM:
-			internal_config.user_mbuf_pool_ops_name = optarg;
+			internal_config.user_mbuf_pool_ops_name =
+			    strdup(optarg);
 			break;
 
 		default:
@@ -613,6 +687,14 @@ eal_parse_args(int argc, char **argv)
 		}
 	}
 
+	/* create runtime data directory */
+	if (internal_config.no_shconf == 0 &&
+			eal_create_runtime_dir() < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+		ret = -1;
+		goto out;
+	}
+
 	if (eal_adjust_config(&internal_config) != 0) {
 		ret = -1;
 		goto out;
@@ -638,23 +720,23 @@ out:
 	return ret;
 }
 
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+	int *socket_id = arg;
+
+	return *socket_id == msl->socket_id;
+}
+
 static void
 eal_check_mem_on_local_socket(void)
 {
-	const struct rte_memseg *ms;
-	int i, socket_id;
+	int socket_id;
 
 	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
 
-	ms = rte_eal_get_physmem_layout();
-
-	for (i = 0; i < RTE_MAX_MEMSEG; i++)
-		if (ms[i].socket_id == socket_id &&
-				ms[i].len > 0)
-			return;
-
-	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
-			"memory on local socket!\n");
+	if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+		RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
 }
 
 static int
@@ -669,6 +751,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
@@ -689,24 +773,8 @@ rte_eal_iopl_init(void)
 #ifdef VFIO_PRESENT
 static int rte_eal_vfio_setup(void)
 {
-	int vfio_enabled = 0;
-
 	if (rte_vfio_enable("vfio"))
 		return -1;
-	vfio_enabled = rte_vfio_is_enabled("vfio");
-
-	if (vfio_enabled) {
-
-		/* if we are primary process, create a thread to communicate with
-		 * secondary processes. the thread will use a socket to wait for
-		 * requests from secondary process to send open file descriptors,
-		 * because VFIO does not allow multiple open descriptors on a group or
-		 * VFIO container.
-		 */
-		if (internal_config.process_type == RTE_PROC_PRIMARY &&
-				vfio_mp_sync_setup() < 0)
-			return -1;
-	}
 
 	return 0;
 }
@@ -779,6 +847,24 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	rte_config_init();
+
+	if (rte_eal_intr_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		return -1;
+	}
+
+	/* Put mp channel init before bus scan so that we can init the vdev
+	 * bus through mp channel in the secondary process before the bus scan.
+	 */
+	if (rte_mp_channel_init() < 0) {
+		rte_eal_init_alert("failed to init mp channel\n");
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			rte_errno = EFAULT;
+			return -1;
+		}
+	}
+
 	if (rte_bus_scan()) {
 		rte_eal_init_alert("Cannot scan the buses for devices\n");
 		rte_errno = ENODEV;
@@ -798,13 +884,17 @@ rte_eal_init(int argc, char **argv)
 			"KNI module inserted\n");
 	}
 
-	if (internal_config.no_hugetlbfs == 0 &&
-			internal_config.process_type != RTE_PROC_SECONDARY &&
-			eal_hugepage_info_init() < 0) {
-		rte_eal_init_alert("Cannot get hugepage information.");
-		rte_errno = EACCES;
-		rte_atomic32_clear(&run_once);
-		return -1;
+	if (internal_config.no_hugetlbfs == 0) {
+		/* rte_config isn't initialized yet */
+		ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+				eal_hugepage_info_init() :
+				eal_hugepage_info_read();
+		if (ret < 0) {
+			rte_eal_init_alert("Cannot get hugepage information.");
+			rte_errno = EACCES;
+			rte_atomic32_clear(&run_once);
+			return -1;
+		}
 	}
 
 	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
@@ -825,8 +915,6 @@ rte_eal_init(int argc, char **argv)
 
 	rte_srand(rte_rdtsc());
 
-	rte_config_init();
-
 	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
 		rte_eal_init_alert("Cannot init logging.");
 		rte_errno = ENOMEM;
@@ -834,14 +922,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_mp_channel_init() < 0) {
-		rte_eal_init_alert("failed to init mp channel\n");
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			rte_errno = EFAULT;
-			return -1;
-		}
-	}
-
 #ifdef VFIO_PRESENT
 	if (rte_eal_vfio_setup() < 0) {
 		rte_eal_init_alert("Cannot init VFIO\n");
@@ -850,6 +930,15 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 #endif
+	/* in secondary processes, memory init may allocate additional fbarrays
+	 * not present in primary processes, so to avoid any potential issues,
+	 * initialize memzones first.
+	 */
+	if (rte_eal_memzone_init() < 0) {
+		rte_eal_init_alert("Cannot init memzone\n");
+		rte_errno = ENODEV;
+		return -1;
+	}
 
 	if (rte_eal_memory_init() < 0) {
 		rte_eal_init_alert("Cannot init memory\n");
@@ -860,8 +949,8 @@ rte_eal_init(int argc, char **argv)
 	/* the directories are locked during eal_hugepage_info_init */
 	eal_hugedirs_unlock();
 
-	if (rte_eal_memzone_init() < 0) {
-		rte_eal_init_alert("Cannot init memzone\n");
+	if (rte_eal_malloc_heap_init() < 0) {
+		rte_eal_init_alert("Cannot init malloc heap\n");
 		rte_errno = ENODEV;
 		return -1;
 	}
@@ -888,17 +977,12 @@ rte_eal_init(int argc, char **argv)
 
 	eal_thread_init_master(rte_config.master_lcore);
 
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (int)thread_id, cpuset,
 		ret == 0 ? "" : "...");
 
-	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
-		return -1;
-	}
-
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
 		/*
@@ -919,7 +1003,7 @@ rte_eal_init(int argc, char **argv)
 			rte_panic("Cannot create thread\n");
 
 		/* Set thread_name for aid in debugging. */
-		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+		snprintf(thread_name, sizeof(thread_name),
 			"lcore-slave-%d", i);
 		ret = rte_thread_setname(lcore_config[i].thread_id,
 						thread_name);
@@ -950,6 +1034,12 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+#ifdef VFIO_PRESENT
+	/* Register mp action after probe() so that we got enough info */
+	if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
+		return -1;
+#endif
+
 	/* initialize default service/lcore mappings and start running. Ignore
 	 * -ENOTSUP, as it indicates no service coremask passed to EAL.
 	 */
@@ -964,9 +1054,26 @@ rte_eal_init(int argc, char **argv)
 	return fctret;
 }
 
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg __rte_unused)
+{
+	/* ms is const, so find this memseg */
+	struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+
+	found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+	return 0;
+}
+
 int __rte_experimental
 rte_eal_cleanup(void)
 {
+	/* if we're in a primary process, we need to mark hugepages as freeable
+	 * so that finalization can release them back to the system.
+	 */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_memseg_walk(mark_freeable, NULL);
 	rte_service_finalize();
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index c115e823..391d2a65 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -19,7 +19,6 @@
 #include <rte_launch.h>
 #include <rte_lcore.h>
 #include <rte_errno.h>
-#include <rte_malloc.h>
 #include <rte_spinlock.h>
 #include <eal_private.h>
 
@@ -91,7 +90,7 @@ eal_alarm_callback(void *arg __rte_unused)
 		rte_spinlock_lock(&alarm_list_lk);
 
 		LIST_REMOVE(ap, next);
-		rte_free(ap);
+		free(ap);
 	}
 
 	if (!LIST_EMPTY(&alarm_list)) {
@@ -122,7 +121,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
 		return -EINVAL;
 
-	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
+	new_alarm = calloc(1, sizeof(*new_alarm));
 	if (new_alarm == NULL)
 		return -ENOMEM;
 
@@ -196,7 +195,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 
 			if (ap->executing == 0) {
 				LIST_REMOVE(ap, next);
-				rte_free(ap);
+				free(ap);
 				count++;
 			} else {
 				/* If calling from other context, mark that alarm is executing
@@ -220,7 +219,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 
 				if (ap->executing == 0) {
 					LIST_REMOVE(ap, next);
-					rte_free(ap);
+					free(ap);
 					count++;
 					ap = ap_prev;
 				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
diff --git a/lib/librte_eal/linuxapp/eal/eal_cpuflags.c b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c
new file mode 100644
index 00000000..d38296e1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Red Hat, Inc.
+ */
+
+#include <elf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 16)
+#include <sys/auxv.h>
+#define HAS_AUXV 1
+#endif
+#endif
+
+#include <rte_cpuflags.h>
+
+#ifndef HAS_AUXV
+static unsigned long
+getauxval(unsigned long type __rte_unused)
+{
+	errno = ENOTSUP;
+	return 0;
+}
+#endif
+
+#ifdef RTE_ARCH_64
+typedef Elf64_auxv_t Internal_Elfx_auxv_t;
+#else
+typedef Elf32_auxv_t Internal_Elfx_auxv_t;
+#endif
+
+/**
+ * Provides a method for retrieving values from the auxiliary vector and
+ * possibly running a string comparison.
+ *
+ * @return Always returns a result.  When the result is 0, check errno
+ * to see if an error occurred during processing.
+ */
+static unsigned long
+_rte_cpu_getauxval(unsigned long type, const char *str)
+{
+	unsigned long val;
+
+	errno = 0;
+	val = getauxval(type);
+
+	if (!val && (errno == ENOTSUP || errno == ENOENT)) {
+		int auxv_fd = open("/proc/self/auxv", O_RDONLY);
+		Internal_Elfx_auxv_t auxv;
+
+		if (auxv_fd == -1)
+			return 0;
+
+		errno = ENOENT;
+		while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
+			if (auxv.a_type == type) {
+				errno = 0;
+				val = auxv.a_un.a_val;
+				if (str)
+					val = strcmp((const char *)val, str);
+				break;
+			}
+		}
+		close(auxv_fd);
+	}
+
+	return val;
+}
+
+unsigned long
+rte_cpu_getauxval(unsigned long type)
+{
+	return _rte_cpu_getauxval(type, NULL);
+}
+
+int
+rte_cpu_strcmp_auxval(unsigned long type, const char *str)
+{
+	return _rte_cpu_getauxval(type, str);
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
new file mode 100644
index 00000000..1cf6aebf
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+
+#include <rte_string_fns.h>
+#include <rte_log.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+#include <rte_malloc.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+
+#include "eal_private.h"
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static bool monitor_started;
+
+#define EAL_UEV_MSG_LEN 4096
+#define EAL_UEV_MSG_ELEM_LEN 128
+
+static void dev_uev_handler(__rte_unused void *param);
+
+/* identify the system layer which reports this event. */
+enum eal_dev_event_subsystem {
+	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
+	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
+	EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
+	EAL_DEV_EVENT_SUBSYSTEM_MAX
+};
+
+static int
+dev_uev_socket_fd_create(void)
+{
+	struct sockaddr_nl addr;
+	int ret;
+
+	intr_handle.fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
+			SOCK_NONBLOCK,
+			NETLINK_KOBJECT_UEVENT);
+	if (intr_handle.fd < 0) {
+		RTE_LOG(ERR, EAL, "create uevent fd failed.\n");
+		return -1;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.nl_family = AF_NETLINK;
+	addr.nl_pid = 0;
+	addr.nl_groups = 0xffffffff;
+
+	ret = bind(intr_handle.fd, (struct sockaddr *) &addr, sizeof(addr));
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n");
+		goto err;
+	}
+
+	return 0;
+err:
+	close(intr_handle.fd);
+	intr_handle.fd = -1;
+	return ret;
+}
+
+static int
+dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
+{
+	char action[EAL_UEV_MSG_ELEM_LEN];
+	char subsystem[EAL_UEV_MSG_ELEM_LEN];
+	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
+	int i = 0;
+
+	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
+	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
+	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
+
+	while (i < length) {
+		for (; i < length; i++) {
+			if (*buf)
+				break;
+			buf++;
+		}
+		/**
+		 * check device uevent from kernel side, no need to check
+		 * uevent from udev.
+		 */
+		if (!strncmp(buf, "libudev", 7)) {
+			buf += 7;
+			i += 7;
+			return -1;
+		}
+		if (!strncmp(buf, "ACTION=", 7)) {
+			buf += 7;
+			i += 7;
+			strlcpy(action, buf, sizeof(action));
+		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+			buf += 10;
+			i += 10;
+			strlcpy(subsystem, buf, sizeof(subsystem));
+		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
+			buf += 14;
+			i += 14;
+			strlcpy(pci_slot_name, buf, sizeof(subsystem));
+			event->devname = strdup(pci_slot_name);
+		}
+		for (; i < length; i++) {
+			if (*buf == '\0')
+				break;
+			buf++;
+		}
+	}
+
+	/* parse the subsystem layer */
+	if (!strncmp(subsystem, "uio", 3))
+		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
+	else if (!strncmp(subsystem, "pci", 3))
+		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
+	else if (!strncmp(subsystem, "vfio", 4))
+		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
+	else
+		return -1;
+
+	/* parse the action type */
+	if (!strncmp(action, "add", 3))
+		event->type = RTE_DEV_EVENT_ADD;
+	else if (!strncmp(action, "remove", 6))
+		event->type = RTE_DEV_EVENT_REMOVE;
+	else
+		return -1;
+	return 0;
+}
+
+static void
+dev_delayed_unregister(void *param)
+{
+	rte_intr_callback_unregister(&intr_handle, dev_uev_handler, param);
+	close(intr_handle.fd);
+	intr_handle.fd = -1;
+}
+
+static void
+dev_uev_handler(__rte_unused void *param)
+{
+	struct rte_dev_event uevent;
+	int ret;
+	char buf[EAL_UEV_MSG_LEN];
+
+	memset(&uevent, 0, sizeof(struct rte_dev_event));
+	memset(buf, 0, EAL_UEV_MSG_LEN);
+
+	ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
+	if (ret < 0 && errno == EAGAIN)
+		return;
+	else if (ret <= 0) {
+		/* connection is closed or broken, can not up again. */
+		RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n");
+		rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
+		return;
+	}
+
+	ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
+	if (ret < 0) {
+		RTE_LOG(DEBUG, EAL, "It is not an valid event "
+			"that need to be handle.\n");
+		return;
+	}
+
+	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
+		uevent.devname, uevent.type, uevent.subsystem);
+
+	if (uevent.devname)
+		dev_callback_process(uevent.devname, uevent.type);
+}
+
+int __rte_experimental
+rte_dev_event_monitor_start(void)
+{
+	int ret;
+
+	if (monitor_started)
+		return 0;
+
+	ret = dev_uev_socket_fd_create();
+	if (ret) {
+		RTE_LOG(ERR, EAL, "error create device event fd.\n");
+		return -1;
+	}
+
+	intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
+	ret = rte_intr_callback_register(&intr_handle, dev_uev_handler, NULL);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "fail to register uevent callback.\n");
+		return -1;
+	}
+
+	monitor_started = true;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_dev_event_monitor_stop(void)
+{
+	int ret;
+
+	if (!monitor_started)
+		return 0;
+
+	ret = rte_intr_callback_unregister(&intr_handle, dev_uev_handler,
+					   (void *)-1);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n");
+		return ret;
+	}
+
+	close(intr_handle.fd);
+	intr_handle.fd = -1;
+	monitor_started = false;
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 8bbf771a..3a7d4b22 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -14,7 +14,11 @@
 #include <stdarg.h>
 #include <unistd.h>
 #include <errno.h>
+#include <sys/mman.h>
 #include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <linux/mman.h> /* for hugetlb-related flags */
 
 #include <rte_memory.h>
 #include <rte_eal.h>
@@ -30,6 +34,40 @@
 #include "eal_filesystem.h"
 
 static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
+static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node";
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+map_shared_memory(const char *filename, const size_t mem_size, int flags)
+{
+	void *retval;
+	int fd = open(filename, flags, 0666);
+	if (fd < 0)
+		return NULL;
+	if (ftruncate(fd, mem_size) < 0) {
+		close(fd);
+		return NULL;
+	}
+	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, 0);
+	close(fd);
+	return retval;
+}
+
+static void *
+open_shared_memory(const char *filename, const size_t mem_size)
+{
+	return map_shared_memory(filename, mem_size, O_RDWR);
+}
+
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+	return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
+}
 
 /* this function is only called from eal_hugepage_info_init which itself
  * is only called from a primary process */
@@ -70,6 +108,45 @@ get_num_hugepages(const char *subdir)
 	return num_pages;
 }
 
+static uint32_t
+get_num_hugepages_on_node(const char *subdir, unsigned int socket)
+{
+	char path[PATH_MAX], socketpath[PATH_MAX];
+	DIR *socketdir;
+	unsigned long num_pages = 0;
+	const char *nr_hp_file = "free_hugepages";
+
+	snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages",
+		sys_pages_numa_dir_path, socket);
+
+	socketdir = opendir(socketpath);
+	if (socketdir) {
+		/* Keep calm and carry on */
+		closedir(socketdir);
+	} else {
+		/* Can't find socket dir, so ignore it */
+		return 0;
+	}
+
+	snprintf(path, sizeof(path), "%s/%s/%s",
+			socketpath, subdir, nr_hp_file);
+	if (eal_parse_sysfs_value(path, &num_pages) < 0)
+		return 0;
+
+	if (num_pages == 0)
+		RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n",
+				subdir);
+
+	/*
+	 * we want to return a uint32_t and more than this looks suspicious
+	 * anyway ...
+	 */
+	if (num_pages > UINT32_MAX)
+		num_pages = UINT32_MAX;
+
+	return num_pages;
+}
+
 static uint64_t
 get_default_hp_size(void)
 {
@@ -94,8 +171,8 @@ get_default_hp_size(void)
 	return size;
 }
 
-static const char *
-get_hugepage_dir(uint64_t hugepage_sz)
+static int
+get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int len)
 {
 	enum proc_mount_fieldnames {
 		DEVICE = 0,
@@ -113,7 +190,7 @@ get_hugepage_dir(uint64_t hugepage_sz)
 	const char split_tok = ' ';
 	char *splitstr[_FIELDNAME_MAX];
 	char buf[BUFSIZ];
-	char *retval = NULL;
+	int retval = -1;
 
 	FILE *fd = fopen(proc_mounts, "r");
 	if (fd == NULL)
@@ -140,7 +217,8 @@ get_hugepage_dir(uint64_t hugepage_sz)
 			/* if no explicit page size, the default page size is compared */
 			if (pagesz_str == NULL){
 				if (hugepage_sz == default_size){
-					retval = strdup(splitstr[MOUNTPT]);
+					strlcpy(hugedir, splitstr[MOUNTPT], len);
+					retval = 0;
 					break;
 				}
 			}
@@ -148,7 +226,8 @@ get_hugepage_dir(uint64_t hugepage_sz)
 			else {
 				uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
 				if (pagesz == hugepage_sz) {
-					retval = strdup(splitstr[MOUNTPT]);
+					strlcpy(hugedir, splitstr[MOUNTPT], len);
+					retval = 0;
 					break;
 				}
 			}
@@ -207,11 +286,9 @@ clear_hugedir(const char * hugedir)
 		/* non-blocking lock */
 		lck_result = flock(fd, LOCK_EX | LOCK_NB);
 
-		/* if lock succeeds, unlock and remove the file */
-		if (lck_result != -1) {
-			flock(fd, LOCK_UN);
+		/* if lock succeeds, remove the file */
+		if (lck_result != -1)
 			unlinkat(dir_fd, dirent->d_name, 0);
-		}
 		close (fd);
 		dirent = readdir(dir);
 	}
@@ -238,17 +315,49 @@ compare_hpi(const void *a, const void *b)
 	return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
 }
 
-/*
- * when we initialize the hugepage info, everything goes
- * to socket 0 by default. it will later get sorted by memory
- * initialization procedure.
- */
-int
-eal_hugepage_info_init(void)
+static void
+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
 {
-	const char dirent_start_text[] = "hugepages-";
+	uint64_t total_pages = 0;
+	unsigned int i;
+
+	/*
+	 * first, try to put all hugepages into relevant sockets, but
+	 * if first attempts fails, fall back to collecting all pages
+	 * in one socket and sorting them later
+	 */
+	total_pages = 0;
+	/* we also don't want to do this for legacy init */
+	if (!internal_config.legacy_mem)
+		for (i = 0; i < rte_socket_count(); i++) {
+			int socket = rte_socket_id_by_idx(i);
+			unsigned int num_pages =
+					get_num_hugepages_on_node(
+						dirent->d_name, socket);
+			hpi->num_pages[socket] = num_pages;
+			total_pages += num_pages;
+		}
+	/*
+	 * we failed to sort memory from the get go, so fall
+	 * back to old way
+	 */
+	if (total_pages == 0) {
+		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+		/* for 32-bit systems, limit number of hugepages to
+		 * 1GB per page size */
+		hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+				RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+	}
+}
+
+static int
+hugepage_info_init(void)
+{	const char dirent_start_text[] = "hugepages-";
 	const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
-	unsigned i, num_sizes = 0;
+	unsigned int i, num_sizes = 0;
 	DIR *dir;
 	struct dirent *dirent;
 
@@ -273,10 +382,10 @@ eal_hugepage_info_init(void)
 		hpi = &internal_config.hugepage_info[num_sizes];
 		hpi->hugepage_sz =
 			rte_str_to_size(&dirent->d_name[dirent_start_len]);
-		hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
 
 		/* first, check if we have a mountpoint */
-		if (hpi->hugedir == NULL) {
+		if (get_hugepage_dir(hpi->hugepage_sz,
+			hpi->hugedir, sizeof(hpi->hugedir)) < 0) {
 			uint32_t num_pages;
 
 			num_pages = get_num_hugepages(dirent->d_name);
@@ -286,6 +395,22 @@ eal_hugepage_info_init(void)
 					"%" PRIu64 " reserved, but no mounted "
 					"hugetlbfs found for that size\n",
 					num_pages, hpi->hugepage_sz);
+			/* if we have kernel support for reserving hugepages
+			 * through mmap, and we're in in-memory mode, treat this
+			 * page size as valid. we cannot be in legacy mode at
+			 * this point because we've checked this earlier in the
+			 * init process.
+			 */
+#ifdef MAP_HUGE_SHIFT
+			if (internal_config.in_memory) {
+				RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
+					"hugepages of size %" PRIu64 " bytes "
+					"will be allocated anonymously\n",
+					hpi->hugepage_sz);
+				calc_num_pages(hpi, dirent);
+				num_sizes++;
+			}
+#endif
 			continue;
 		}
 
@@ -302,16 +427,7 @@ eal_hugepage_info_init(void)
 		if (clear_hugedir(hpi->hugedir) == -1)
 			break;
 
-		/* for now, put all pages into socket 0,
-		 * later they will be sorted */
-		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
-
-#ifndef RTE_ARCH_64
-		/* for 32-bit systems, limit number of hugepages to
-		 * 1GB per page size */
-		hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
-					    RTE_PGSIZE_1G / hpi->hugepage_sz);
-#endif
+		calc_num_pages(hpi, dirent);
 
 		num_sizes++;
 	}
@@ -328,11 +444,82 @@ eal_hugepage_info_init(void)
 	      sizeof(internal_config.hugepage_info[0]), compare_hpi);
 
 	/* now we have all info, check we have at least one valid size */
-	for (i = 0; i < num_sizes; i++)
-		if (internal_config.hugepage_info[i].hugedir != NULL &&
-		    internal_config.hugepage_info[i].num_pages[0] > 0)
+	for (i = 0; i < num_sizes; i++) {
+		/* pages may no longer all be on socket 0, so check all */
+		unsigned int j, num_pages = 0;
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+
+		for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+			num_pages += hpi->num_pages[j];
+		if (num_pages > 0)
 			return 0;
+	}
 
 	/* no valid hugepage mounts available, return error */
 	return -1;
 }
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+	struct hugepage_info *hpi, *tmp_hpi;
+	unsigned int i;
+
+	if (hugepage_info_init() < 0)
+		return -1;
+
+	/* for no shared files mode, we're done */
+	if (internal_config.no_shconf)
+		return 0;
+
+	hpi = &internal_config.hugepage_info[0];
+
+	tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
+			sizeof(internal_config.hugepage_info));
+	if (tmp_hpi == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+		return -1;
+	}
+
+	memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
+
+	/* we've copied file descriptors along with everything else, but they
+	 * will be invalid in secondary process, so overwrite them
+	 */
+	for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+		struct hugepage_info *tmp = &tmp_hpi[i];
+		tmp->lock_descriptor = -1;
+	}
+
+	if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+		return -1;
+	}
+	return 0;
+}
+
+int eal_hugepage_info_read(void)
+{
+	struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+	struct hugepage_info *tmp_hpi;
+
+	tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
+				  sizeof(internal_config.hugepage_info));
+	if (tmp_hpi == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
+		return -1;
+	}
+
+	memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
+
+	if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+		return -1;
+	}
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index f86f22f7..4076c6d6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -30,7 +30,6 @@
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 #include <rte_log.h>
-#include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 #include <rte_pause.h>
@@ -405,8 +404,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	}
 
 	/* allocate a new interrupt callback entity */
-	callback = rte_zmalloc("interrupt callback list",
-				sizeof(*callback), 0);
+	callback = calloc(1, sizeof(*callback));
 	if (callback == NULL) {
 		RTE_LOG(ERR, EAL, "Can not allocate memory\n");
 		return -ENOMEM;
@@ -420,7 +418,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	TAILQ_FOREACH(src, &intr_sources, next) {
 		if (src->intr_handle.fd == intr_handle->fd) {
 			/* we had no interrupts for this */
-			if TAILQ_EMPTY(&src->callbacks)
+			if (TAILQ_EMPTY(&src->callbacks))
 				wake_thread = 1;
 
 			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
@@ -431,10 +429,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 
 	/* no existing callbacks for this - add new source */
 	if (src == NULL) {
-		if ((src = rte_zmalloc("interrupt source list",
-				sizeof(*src), 0)) == NULL) {
+		src = calloc(1, sizeof(*src));
+		if (src == NULL) {
 			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
-			rte_free(callback);
+			free(callback);
 			ret = -ENOMEM;
 		} else {
 			src->intr_handle = *intr_handle;
@@ -501,7 +499,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
 					cb->cb_arg == cb_arg)) {
 				TAILQ_REMOVE(&src->callbacks, cb, next);
-				rte_free(cb);
+				free(cb);
 				ret++;
 			}
 		}
@@ -509,7 +507,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 		/* all callbacks for that source are removed. */
 		if (TAILQ_EMPTY(&src->callbacks)) {
 			TAILQ_REMOVE(&intr_sources, src, next);
-			rte_free(src);
+			free(src);
 		}
 	}
 
@@ -559,6 +557,9 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle)
 			return -1;
 		break;
 #endif
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
 	/* unknown handle type */
 	default:
 		RTE_LOG(ERR, EAL,
@@ -606,6 +607,9 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle)
 			return -1;
 		break;
 #endif
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
 	/* unknown handle type */
 	default:
 		RTE_LOG(ERR, EAL,
@@ -674,7 +678,10 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
 			bytes_read = 0;
 			call = true;
 			break;
-
+		case RTE_INTR_HANDLE_DEV_EVENT:
+			bytes_read = 0;
+			call = true;
+			break;
 		default:
 			bytes_read = 1;
 			break;
@@ -844,8 +851,7 @@ eal_intr_thread_main(__rte_unused void *arg)
 int
 rte_eal_intr_init(void)
 {
-	int ret = 0, ret_1 = 0;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+	int ret = 0;
 
 	/* init the global interrupt source head */
 	TAILQ_INIT(&intr_sources);
@@ -860,23 +866,15 @@ rte_eal_intr_init(void)
 	}
 
 	/* create the host thread to wait/handle the interrupt */
-	ret = pthread_create(&intr_thread, NULL,
+	ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
 			eal_intr_thread_main, NULL);
 	if (ret != 0) {
-		rte_errno = ret;
+		rte_errno = -ret;
 		RTE_LOG(ERR, EAL,
 			"Failed to create thread for interrupt handling\n");
-	} else {
-		/* Set thread_name for aid in debugging. */
-		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
-			"eal-intr-thread");
-		ret_1 = rte_thread_setname(intr_thread, thread_name);
-		if (ret_1 != 0)
-			RTE_LOG(DEBUG, EAL,
-			"Failed to set thread name for interrupt handling\n");
 	}
 
-	return -ret;
+	return ret;
 }
 
 static void
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index ff145884..9d02dddb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -25,25 +25,14 @@
 static ssize_t
 console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
 {
-	char copybuf[BUFSIZ + 1];
 	ssize_t ret;
-	uint32_t loglevel;
 
 	/* write on stdout */
 	ret = fwrite(buf, 1, size, stdout);
 	fflush(stdout);
 
-	/* truncate message if too big (should not happen) */
-	if (size > BUFSIZ)
-		size = BUFSIZ;
-
 	/* Syslog error levels are from 0 to 7, so subtract 1 to convert */
-	loglevel = rte_log_cur_msg_loglevel() - 1;
-	memcpy(copybuf, buf, size);
-	copybuf[size] = '\0';
-
-	/* write on syslog too */
-	syslog(loglevel, "%s", copybuf);
+	syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf);
 
 	return ret;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
new file mode 100644
index 00000000..aa95551a
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -0,0 +1,1363 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#define _FILE_OFFSET_BITS 64
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <setjmp.h>
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+#include <numa.h>
+#include <numaif.h>
+#endif
+#include <linux/falloc.h>
+#include <linux/mman.h> /* for hugetlb-related mmap flags */
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_eal_memconfig.h>
+#include <rte_eal.h>
+#include <rte_memory.h>
+#include <rte_spinlock.h>
+
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_private.h"
+
+const int anonymous_hugepages_supported =
+#ifdef MAP_HUGE_SHIFT
+		1;
+#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT
+#else
+		0;
+#define RTE_MAP_HUGE_SHIFT 26
+#endif
+
+/*
+ * not all kernel version support fallocate on hugetlbfs, so fall back to
+ * ftruncate and disallow deallocation if fallocate is not supported.
+ */
+static int fallocate_supported = -1; /* unknown */
+
+/* for single-file segments, we need some kind of mechanism to keep track of
+ * which hugepages can be freed back to the system, and which cannot. we cannot
+ * use flock() because they don't allow locking parts of a file, and we cannot
+ * use fcntl() due to issues with their semantics, so we will have to rely on a
+ * bunch of lockfiles for each page.
+ *
+ * we cannot know how many pages a system will have in advance, but we do know
+ * that they come in lists, and we know lengths of these lists. so, simply store
+ * a malloc'd array of fd's indexed by list and segment index.
+ *
+ * they will be initialized at startup, and filled as we allocate/deallocate
+ * segments. also, use this to track memseg list proper fd.
+ */
+static struct {
+	int *fds; /**< dynamically allocated array of segment lock fd's */
+	int memseg_list_fd; /**< memseg list fd */
+	int len; /**< total length of the array */
+	int count; /**< entries used in an array */
+} lock_fds[RTE_MAX_MEMSEG_LISTS];
+
+/** local copy of a memory map, used to synchronize memory hotplug in MP */
+static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS];
+
+static sigjmp_buf huge_jmpenv;
+
+static void __rte_unused huge_sigbus_handler(int signo __rte_unused)
+{
+	siglongjmp(huge_jmpenv, 1);
+}
+
+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
+ * non-static local variable in the stack frame calling sigsetjmp might be
+ * clobbered by a call to longjmp.
+ */
+static int __rte_unused huge_wrap_sigsetjmp(void)
+{
+	return sigsetjmp(huge_jmpenv, 1);
+}
+
+static struct sigaction huge_action_old;
+static int huge_need_recover;
+
+static void __rte_unused
+huge_register_sigbus(void)
+{
+	sigset_t mask;
+	struct sigaction action;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGBUS);
+	action.sa_flags = 0;
+	action.sa_mask = mask;
+	action.sa_handler = huge_sigbus_handler;
+
+	huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
+}
+
+static void __rte_unused
+huge_recover_sigbus(void)
+{
+	if (huge_need_recover) {
+		sigaction(SIGBUS, &huge_action_old, NULL);
+		huge_need_recover = 0;
+	}
+}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+static bool
+check_numa(void)
+{
+	bool ret = true;
+	/* Check if kernel supports NUMA. */
+	if (numa_available() != 0) {
+		RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
+		ret = false;
+	}
+	return ret;
+}
+
+static void
+prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)
+{
+	RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+	if (get_mempolicy(oldpolicy, oldmask->maskp,
+			  oldmask->size + 1, 0, 0) < 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to get current mempolicy: %s. "
+			"Assuming MPOL_DEFAULT.\n", strerror(errno));
+		oldpolicy = MPOL_DEFAULT;
+	}
+	RTE_LOG(DEBUG, EAL,
+		"Setting policy MPOL_PREFERRED for socket %d\n",
+		socket_id);
+	numa_set_preferred(socket_id);
+}
+
+static void
+restore_numa(int *oldpolicy, struct bitmask *oldmask)
+{
+	RTE_LOG(DEBUG, EAL,
+		"Restoring previous memory policy: %d\n", *oldpolicy);
+	if (*oldpolicy == MPOL_DEFAULT) {
+		numa_set_localalloc();
+	} else if (set_mempolicy(*oldpolicy, oldmask->maskp,
+				 oldmask->size + 1) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
+			strerror(errno));
+		numa_set_localalloc();
+	}
+	numa_free_cpumask(oldmask);
+}
+#endif
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+get_file_size(int fd)
+{
+	struct stat st;
+	if (fstat(fd, &st) < 0)
+		return 0;
+	return st.st_size;
+}
+
+/* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
+static int lock(int fd, int type)
+{
+	int ret;
+
+	/* flock may be interrupted */
+	do {
+		ret = flock(fd, type | LOCK_NB);
+	} while (ret && errno == EINTR);
+
+	if (ret && errno == EWOULDBLOCK) {
+		/* couldn't lock */
+		return 0;
+	} else if (ret) {
+		RTE_LOG(ERR, EAL, "%s(): error calling flock(): %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+	/* lock was successful */
+	return 1;
+}
+
+static int get_segment_lock_fd(int list_idx, int seg_idx)
+{
+	char path[PATH_MAX] = {0};
+	int fd;
+
+	if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+		return -1;
+	if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+		return -1;
+
+	fd = lock_fds[list_idx].fds[seg_idx];
+	/* does this lock already exist? */
+	if (fd >= 0)
+		return fd;
+
+	eal_get_hugefile_lock_path(path, sizeof(path),
+			list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+
+	fd = open(path, O_CREAT | O_RDWR, 0660);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error creating lockfile '%s': %s\n",
+			__func__, path, strerror(errno));
+		return -1;
+	}
+	/* take out a read lock */
+	if (lock(fd, LOCK_SH) != 1) {
+		RTE_LOG(ERR, EAL, "%s(): failed to take out a readlock on '%s': %s\n",
+			__func__, path, strerror(errno));
+		close(fd);
+		return -1;
+	}
+	/* store it for future reference */
+	lock_fds[list_idx].fds[seg_idx] = fd;
+	lock_fds[list_idx].count++;
+	return fd;
+}
+
+static int unlock_segment(int list_idx, int seg_idx)
+{
+	int fd, ret;
+
+	if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+		return -1;
+	if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+		return -1;
+
+	fd = lock_fds[list_idx].fds[seg_idx];
+
+	/* upgrade lock to exclusive to see if we can remove the lockfile */
+	ret = lock(fd, LOCK_EX);
+	if (ret == 1) {
+		/* we've succeeded in taking exclusive lock, this lockfile may
+		 * be removed.
+		 */
+		char path[PATH_MAX] = {0};
+		eal_get_hugefile_lock_path(path, sizeof(path),
+				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+		if (unlink(path)) {
+			RTE_LOG(ERR, EAL, "%s(): error removing lockfile '%s': %s\n",
+					__func__, path, strerror(errno));
+		}
+	}
+	/* we don't want to leak the fd, so even if we fail to lock, close fd
+	 * and remove it from list anyway.
+	 */
+	close(fd);
+	lock_fds[list_idx].fds[seg_idx] = -1;
+	lock_fds[list_idx].count--;
+
+	if (ret < 0)
+		return -1;
+	return 0;
+}
+
+static int
+get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
+		unsigned int list_idx, unsigned int seg_idx)
+{
+	int fd;
+
+	if (internal_config.single_file_segments) {
+		/* create a hugepage file path */
+		eal_get_hugefile_path(path, buflen, hi->hugedir, list_idx);
+
+		fd = lock_fds[list_idx].memseg_list_fd;
+
+		if (fd < 0) {
+			fd = open(path, O_CREAT | O_RDWR, 0600);
+			if (fd < 0) {
+				RTE_LOG(ERR, EAL, "%s(): open failed: %s\n",
+					__func__, strerror(errno));
+				return -1;
+			}
+			/* take out a read lock and keep it indefinitely */
+			if (lock(fd, LOCK_SH) < 0) {
+				RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+					__func__, strerror(errno));
+				close(fd);
+				return -1;
+			}
+			lock_fds[list_idx].memseg_list_fd = fd;
+		}
+	} else {
+		/* create a hugepage file path */
+		eal_get_hugefile_path(path, buflen, hi->hugedir,
+				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+		fd = open(path, O_CREAT | O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+		/* take out a read lock */
+		if (lock(fd, LOCK_SH) < 0) {
+			RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+				__func__, strerror(errno));
+			close(fd);
+			return -1;
+		}
+	}
+	return fd;
+}
+
+static int
+resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
+		uint64_t fa_offset, uint64_t page_sz, bool grow)
+{
+	bool again = false;
+	do {
+		if (fallocate_supported == 0) {
+			/* we cannot deallocate memory if fallocate() is not
+			 * supported, and hugepage file is already locked at
+			 * creation, so no further synchronization needed.
+			 */
+
+			if (!grow) {
+				RTE_LOG(DEBUG, EAL, "%s(): fallocate not supported, not freeing page back to the system\n",
+					__func__);
+				return -1;
+			}
+			uint64_t new_size = fa_offset + page_sz;
+			uint64_t cur_size = get_file_size(fd);
+
+			/* fallocate isn't supported, fall back to ftruncate */
+			if (new_size > cur_size &&
+					ftruncate(fd, new_size) < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+					__func__, strerror(errno));
+				return -1;
+			}
+		} else {
+			int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |
+					FALLOC_FL_KEEP_SIZE;
+			int ret, lock_fd;
+
+			/* if fallocate() is supported, we need to take out a
+			 * read lock on allocate (to prevent other processes
+			 * from deallocating this page), and take out a write
+			 * lock on deallocate (to ensure nobody else is using
+			 * this page).
+			 *
+			 * read locks on page itself are already taken out at
+			 * file creation, in get_seg_fd().
+			 *
+			 * we cannot rely on simple use of flock() call, because
+			 * we need to be able to lock a section of the file,
+			 * and we cannot use fcntl() locks, because of numerous
+			 * problems with their semantics, so we will use
+			 * deterministically named lock files for each section
+			 * of the file.
+			 *
+			 * if we're shrinking the file, we want to upgrade our
+			 * lock from shared to exclusive.
+			 *
+			 * lock_fd is an fd for a lockfile, not for the segment
+			 * list.
+			 */
+			lock_fd = get_segment_lock_fd(list_idx, seg_idx);
+
+			if (!grow) {
+				/* we are using this lockfile to determine
+				 * whether this particular page is locked, as we
+				 * are in single file segments mode and thus
+				 * cannot use regular flock() to get this info.
+				 *
+				 * we want to try and take out an exclusive lock
+				 * on the lock file to determine if we're the
+				 * last ones using this page, and if not, we
+				 * won't be shrinking it, and will instead exit
+				 * prematurely.
+				 */
+				ret = lock(lock_fd, LOCK_EX);
+
+				/* drop the lock on the lockfile, so that even
+				 * if we couldn't shrink the file ourselves, we
+				 * are signalling to other processes that we're
+				 * no longer using this page.
+				 */
+				if (unlock_segment(list_idx, seg_idx))
+					RTE_LOG(ERR, EAL, "Could not unlock segment\n");
+
+				/* additionally, if this was the last lock on
+				 * this segment list, we can safely close the
+				 * page file fd, so that one of the processes
+				 * could then delete the file after shrinking.
+				 */
+				if (ret < 1 && lock_fds[list_idx].count == 0) {
+					close(fd);
+					lock_fds[list_idx].memseg_list_fd = -1;
+				}
+
+				if (ret < 0) {
+					RTE_LOG(ERR, EAL, "Could not lock segment\n");
+					return -1;
+				}
+				if (ret == 0)
+					/* failed to lock, not an error. */
+					return 0;
+			}
+
+			/* grow or shrink the file */
+			ret = fallocate(fd, flags, fa_offset, page_sz);
+
+			if (ret < 0) {
+				if (fallocate_supported == -1 &&
+						errno == ENOTSUP) {
+					RTE_LOG(ERR, EAL, "%s(): fallocate() not supported, hugepage deallocation will be disabled\n",
+						__func__);
+					again = true;
+					fallocate_supported = 0;
+				} else {
+					RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n",
+						__func__,
+						strerror(errno));
+					return -1;
+				}
+			} else {
+				fallocate_supported = 1;
+
+				/* we've grew/shrunk the file, and we hold an
+				 * exclusive lock now. check if there are no
+				 * more segments active in this segment list,
+				 * and remove the file if there aren't.
+				 */
+				if (lock_fds[list_idx].count == 0) {
+					if (unlink(path))
+						RTE_LOG(ERR, EAL, "%s(): unlinking '%s' failed: %s\n",
+							__func__, path,
+							strerror(errno));
+					close(fd);
+					lock_fds[list_idx].memseg_list_fd = -1;
+				}
+			}
+		}
+	} while (again);
+	return 0;
+}
+
+static int
+alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
+		struct hugepage_info *hi, unsigned int list_idx,
+		unsigned int seg_idx)
+{
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	int cur_socket_id = 0;
+#endif
+	uint64_t map_offset;
+	rte_iova_t iova;
+	void *va;
+	char path[PATH_MAX];
+	int ret = 0;
+	int fd;
+	size_t alloc_sz;
+	int flags;
+	void *new_addr;
+
+	alloc_sz = hi->hugepage_sz;
+	if (!internal_config.single_file_segments &&
+			internal_config.in_memory &&
+			anonymous_hugepages_supported) {
+		int log2, flags;
+
+		log2 = rte_log2_u32(alloc_sz);
+		/* as per mmap() manpage, all page sizes are log2 of page size
+		 * shifted by MAP_HUGE_SHIFT
+		 */
+		flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+				MAP_PRIVATE | MAP_ANONYMOUS;
+		fd = -1;
+		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+		/* single-file segments codepath will never be active because
+		 * in-memory mode is incompatible with it and it's stopped at
+		 * EAL initialization stage, however the compiler doesn't know
+		 * that and complains about map_offset being used uninitialized
+		 * on failure codepaths while having in-memory mode enabled. so,
+		 * assign a value here.
+		 */
+		map_offset = 0;
+	} else {
+		/* takes out a read lock on segment or segment list */
+		fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
+			return -1;
+		}
+
+		if (internal_config.single_file_segments) {
+			map_offset = seg_idx * alloc_sz;
+			ret = resize_hugefile(fd, path, list_idx, seg_idx,
+					map_offset, alloc_sz, true);
+			if (ret < 0)
+				goto resized;
+		} else {
+			map_offset = 0;
+			if (ftruncate(fd, alloc_sz) < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+					__func__, strerror(errno));
+				goto resized;
+			}
+			if (internal_config.hugepage_unlink) {
+				if (unlink(path)) {
+					RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+						__func__, strerror(errno));
+					goto resized;
+				}
+			}
+		}
+
+		/*
+		 * map the segment, and populate page tables, the kernel fills
+		 * this segment with zeros if it's a new page.
+		 */
+		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
+				map_offset);
+	}
+
+	if (va == MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
+			strerror(errno));
+		/* mmap failed, but the previous region might have been
+		 * unmapped anyway. try to remap it
+		 */
+		goto unmapped;
+	}
+	if (va != addr) {
+		RTE_LOG(DEBUG, EAL, "%s(): wrong mmap() address\n", __func__);
+		munmap(va, alloc_sz);
+		goto resized;
+	}
+
+	/* In linux, hugetlb limitations, like cgroup, are
+	 * enforced at fault time instead of mmap(), even
+	 * with the option of MAP_POPULATE. Kernel will send
+	 * a SIGBUS signal. To avoid to be killed, save stack
+	 * environment here, if SIGBUS happens, we can jump
+	 * back here.
+	 */
+	if (huge_wrap_sigsetjmp()) {
+		RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more hugepages of size %uMB\n",
+			(unsigned int)(alloc_sz >> 20));
+		goto mapped;
+	}
+
+	/* we need to trigger a write to the page to enforce page fault and
+	 * ensure that page is accessible to us, but we can't overwrite value
+	 * that is already there, so read the old value, and write itback.
+	 * kernel populates the page with zeroes initially.
+	 */
+	*(volatile int *)addr = *(volatile int *)addr;
+
+	iova = rte_mem_virt2iova(addr);
+	if (iova == RTE_BAD_PHYS_ADDR) {
+		RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
+			__func__);
+		goto mapped;
+	}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
+
+	if (cur_socket_id != socket_id) {
+		RTE_LOG(DEBUG, EAL,
+				"%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
+			__func__, socket_id, cur_socket_id);
+		goto mapped;
+	}
+#endif
+	/* for non-single file segments that aren't in-memory, we can close fd
+	 * here */
+	if (!internal_config.single_file_segments && !internal_config.in_memory)
+		close(fd);
+
+	ms->addr = addr;
+	ms->hugepage_sz = alloc_sz;
+	ms->len = alloc_sz;
+	ms->nchannel = rte_memory_get_nchannel();
+	ms->nrank = rte_memory_get_nrank();
+	ms->iova = iova;
+	ms->socket_id = socket_id;
+
+	return 0;
+
+mapped:
+	munmap(addr, alloc_sz);
+unmapped:
+	flags = MAP_FIXED;
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+	new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
+	if (new_addr != addr) {
+		if (new_addr != NULL)
+			munmap(new_addr, alloc_sz);
+		/* we're leaving a hole in our virtual address space. if
+		 * somebody else maps this hole now, we could accidentally
+		 * override it in the future.
+		 */
+		RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
+	}
+resized:
+	/* in-memory mode will never be single-file-segments mode */
+	if (internal_config.single_file_segments) {
+		resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
+				alloc_sz, false);
+		/* ignore failure, can't make it any worse */
+	} else {
+		/* only remove file if we can take out a write lock */
+		if (internal_config.hugepage_unlink == 0 &&
+				internal_config.in_memory == 0 &&
+				lock(fd, LOCK_EX) == 1)
+			unlink(path);
+		close(fd);
+	}
+	return -1;
+}
+
+static int
+free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
+		unsigned int list_idx, unsigned int seg_idx)
+{
+	uint64_t map_offset;
+	char path[PATH_MAX];
+	int fd, ret;
+
+	/* erase page data */
+	memset(ms->addr, 0, ms->len);
+
+	if (mmap(ms->addr, ms->len, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	/* if we've already unlinked the page, nothing needs to be done */
+	if (internal_config.hugepage_unlink) {
+		memset(ms, 0, sizeof(*ms));
+		return 0;
+	}
+
+	/* if we are not in single file segments mode, we're going to unmap the
+	 * segment and thus drop the lock on original fd, but hugepage dir is
+	 * now locked so we can take out another one without races.
+	 */
+	fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+	if (fd < 0)
+		return -1;
+
+	if (internal_config.single_file_segments) {
+		map_offset = seg_idx * ms->len;
+		if (resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
+				ms->len, false))
+			return -1;
+		ret = 0;
+	} else {
+		/* if we're able to take out a write lock, we're the last one
+		 * holding onto this page.
+		 */
+		ret = lock(fd, LOCK_EX);
+		if (ret >= 0) {
+			/* no one else is using this page */
+			if (ret == 1)
+				unlink(path);
+		}
+		/* closing fd will drop the lock */
+		close(fd);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return ret < 0 ? -1 : 0;
+}
+
+struct alloc_walk_param {
+	struct hugepage_info *hi;
+	struct rte_memseg **ms;
+	size_t page_sz;
+	unsigned int segs_allocated;
+	unsigned int n_segs;
+	int socket;
+	bool exact;
+};
+static int
+alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct alloc_walk_param *wa = arg;
+	struct rte_memseg_list *cur_msl;
+	size_t page_sz;
+	int cur_idx, start_idx, j, dir_fd = -1;
+	unsigned int msl_idx, need, i;
+
+	if (msl->page_sz != wa->page_sz)
+		return 0;
+	if (msl->socket_id != wa->socket)
+		return 0;
+
+	page_sz = (size_t)msl->page_sz;
+
+	msl_idx = msl - mcfg->memsegs;
+	cur_msl = &mcfg->memsegs[msl_idx];
+
+	need = wa->n_segs;
+
+	/* try finding space in memseg list */
+	cur_idx = rte_fbarray_find_next_n_free(&cur_msl->memseg_arr, 0, need);
+	if (cur_idx < 0)
+		return 0;
+	start_idx = cur_idx;
+
+	/* do not allow any page allocations during the time we're allocating,
+	 * because file creation and locking operations are not atomic,
+	 * and we might be the first or the last ones to use a particular page,
+	 * so we need to ensure atomicity of every operation.
+	 *
+	 * during init, we already hold a write lock, so don't try to take out
+	 * another one.
+	 */
+	if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
+		dir_fd = open(wa->hi->hugedir, O_RDONLY);
+		if (dir_fd < 0) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
+				__func__, wa->hi->hugedir, strerror(errno));
+			return -1;
+		}
+		/* blocking writelock */
+		if (flock(dir_fd, LOCK_EX)) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
+				__func__, wa->hi->hugedir, strerror(errno));
+			close(dir_fd);
+			return -1;
+		}
+	}
+
+	for (i = 0; i < need; i++, cur_idx++) {
+		struct rte_memseg *cur;
+		void *map_addr;
+
+		cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
+		map_addr = RTE_PTR_ADD(cur_msl->base_va,
+				cur_idx * page_sz);
+
+		if (alloc_seg(cur, map_addr, wa->socket, wa->hi,
+				msl_idx, cur_idx)) {
+			RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, but only %i were allocated\n",
+				need, i);
+
+			/* if exact number wasn't requested, stop */
+			if (!wa->exact)
+				goto out;
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr =
+						&cur_msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				rte_fbarray_set_free(arr, j);
+
+				/* free_seg may attempt to create a file, which
+				 * may fail.
+				 */
+				if (free_seg(tmp, wa->hi, msl_idx, j))
+					RTE_LOG(DEBUG, EAL, "Cannot free page\n");
+			}
+			/* clear the list */
+			if (wa->ms)
+				memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
+
+			if (dir_fd >= 0)
+				close(dir_fd);
+			return -1;
+		}
+		if (wa->ms)
+			wa->ms[i] = cur;
+
+		rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
+	}
+out:
+	wa->segs_allocated = i;
+	if (i > 0)
+		cur_msl->version++;
+	if (dir_fd >= 0)
+		close(dir_fd);
+	return 1;
+}
+
+struct free_walk_param {
+	struct hugepage_info *hi;
+	struct rte_memseg *ms;
+};
+static int
+free_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *found_msl;
+	struct free_walk_param *wa = arg;
+	uintptr_t start_addr, end_addr;
+	int msl_idx, seg_idx, ret, dir_fd = -1;
+
+	start_addr = (uintptr_t) msl->base_va;
+	end_addr = start_addr + msl->memseg_arr.len * (size_t)msl->page_sz;
+
+	if ((uintptr_t)wa->ms->addr < start_addr ||
+			(uintptr_t)wa->ms->addr >= end_addr)
+		return 0;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
+
+	/* msl is const */
+	found_msl = &mcfg->memsegs[msl_idx];
+
+	/* do not allow any page allocations during the time we're freeing,
+	 * because file creation and locking operations are not atomic,
+	 * and we might be the first or the last ones to use a particular page,
+	 * so we need to ensure atomicity of every operation.
+	 *
+	 * during init, we already hold a write lock, so don't try to take out
+	 * another one.
+	 */
+	if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
+		dir_fd = open(wa->hi->hugedir, O_RDONLY);
+		if (dir_fd < 0) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
+				__func__, wa->hi->hugedir, strerror(errno));
+			return -1;
+		}
+		/* blocking writelock */
+		if (flock(dir_fd, LOCK_EX)) {
+			RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
+				__func__, wa->hi->hugedir, strerror(errno));
+			close(dir_fd);
+			return -1;
+		}
+	}
+
+	found_msl->version++;
+
+	rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
+
+	ret = free_seg(wa->ms, wa->hi, msl_idx, seg_idx);
+
+	if (dir_fd >= 0)
+		close(dir_fd);
+
+	if (ret < 0)
+		return -1;
+
+	return 1;
+}
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+		int socket, bool exact)
+{
+	int i, ret = -1;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	bool have_numa = false;
+	int oldpolicy;
+	struct bitmask *oldmask;
+#endif
+	struct alloc_walk_param wa;
+	struct hugepage_info *hi = NULL;
+
+	memset(&wa, 0, sizeof(wa));
+
+	/* dynamic allocation not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (page_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "%s(): can't find relevant hugepage_info entry\n",
+			__func__);
+		return -1;
+	}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	if (check_numa()) {
+		oldmask = numa_allocate_nodemask();
+		prepare_numa(&oldpolicy, oldmask, socket);
+		have_numa = true;
+	}
+#endif
+
+	wa.exact = exact;
+	wa.hi = hi;
+	wa.ms = ms;
+	wa.n_segs = n_segs;
+	wa.page_sz = page_sz;
+	wa.socket = socket;
+	wa.segs_allocated = 0;
+
+	/* memalloc is locked, so it's safe to use thread-unsafe version */
+	ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
+	if (ret == 0) {
+		RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
+			__func__);
+		ret = -1;
+	} else if (ret > 0) {
+		ret = (int)wa.segs_allocated;
+	}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	if (have_numa)
+		restore_numa(&oldpolicy, oldmask);
+#endif
+	return ret;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket)
+{
+	struct rte_memseg *ms;
+	if (eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true) < 0)
+		return NULL;
+	/* return pointer to newly allocated memseg */
+	return ms;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
+{
+	int seg, ret = 0;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (seg = 0; seg < n_segs; seg++) {
+		struct rte_memseg *cur = ms[seg];
+		struct hugepage_info *hi = NULL;
+		struct free_walk_param wa;
+		int i, walk_res;
+
+		/* if this page is marked as unfreeable, fail */
+		if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+			RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n");
+			ret = -1;
+			continue;
+		}
+
+		memset(&wa, 0, sizeof(wa));
+
+		for (i = 0; i < (int)RTE_DIM(internal_config.hugepage_info);
+				i++) {
+			hi = &internal_config.hugepage_info[i];
+			if (cur->hugepage_sz == hi->hugepage_sz)
+				break;
+		}
+		if (i == (int)RTE_DIM(internal_config.hugepage_info)) {
+			RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+			ret = -1;
+			continue;
+		}
+
+		wa.ms = cur;
+		wa.hi = hi;
+
+		/* memalloc is locked, so it's safe to use thread-unsafe version
+		 */
+		walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
+				&wa);
+		if (walk_res == 1)
+			continue;
+		if (walk_res == 0)
+			RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		ret = -1;
+	}
+	return ret;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms)
+{
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	return eal_memalloc_free_seg_bulk(&ms, 1);
+}
+
+static int
+sync_chunk(struct rte_memseg_list *primary_msl,
+		struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+		unsigned int msl_idx, bool used, int start, int end)
+{
+	struct rte_fbarray *l_arr, *p_arr;
+	int i, ret, chunk_len, diff_len;
+
+	l_arr = &local_msl->memseg_arr;
+	p_arr = &primary_msl->memseg_arr;
+
+	/* we need to aggregate allocations/deallocations into bigger chunks,
+	 * as we don't want to spam the user with per-page callbacks.
+	 *
+	 * to avoid any potential issues, we also want to trigger
+	 * deallocation callbacks *before* we actually deallocate
+	 * memory, so that the user application could wrap up its use
+	 * before it goes away.
+	 */
+
+	chunk_len = end - start;
+
+	/* find how many contiguous pages we can map/unmap for this chunk */
+	diff_len = used ?
+			rte_fbarray_find_contig_free(l_arr, start) :
+			rte_fbarray_find_contig_used(l_arr, start);
+
+	/* has to be at least one page */
+	if (diff_len < 1)
+		return -1;
+
+	diff_len = RTE_MIN(chunk_len, diff_len);
+
+	/* if we are freeing memory, notify the application */
+	if (!used) {
+		struct rte_memseg *ms;
+		void *start_va;
+		size_t len, page_sz;
+
+		ms = rte_fbarray_get(l_arr, start);
+		start_va = ms->addr;
+		page_sz = (size_t)primary_msl->page_sz;
+		len = page_sz * diff_len;
+
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+				start_va, len);
+	}
+
+	for (i = 0; i < diff_len; i++) {
+		struct rte_memseg *p_ms, *l_ms;
+		int seg_idx = start + i;
+
+		l_ms = rte_fbarray_get(l_arr, seg_idx);
+		p_ms = rte_fbarray_get(p_arr, seg_idx);
+
+		if (l_ms == NULL || p_ms == NULL)
+			return -1;
+
+		if (used) {
+			ret = alloc_seg(l_ms, p_ms->addr,
+					p_ms->socket_id, hi,
+					msl_idx, seg_idx);
+			if (ret < 0)
+				return -1;
+			rte_fbarray_set_used(l_arr, seg_idx);
+		} else {
+			ret = free_seg(l_ms, hi, msl_idx, seg_idx);
+			rte_fbarray_set_free(l_arr, seg_idx);
+			if (ret < 0)
+				return -1;
+		}
+	}
+
+	/* if we just allocated memory, notify the application */
+	if (used) {
+		struct rte_memseg *ms;
+		void *start_va;
+		size_t len, page_sz;
+
+		ms = rte_fbarray_get(l_arr, start);
+		start_va = ms->addr;
+		page_sz = (size_t)primary_msl->page_sz;
+		len = page_sz * diff_len;
+
+		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+				start_va, len);
+	}
+
+	/* calculate how much we can advance until next chunk */
+	diff_len = used ?
+			rte_fbarray_find_contig_used(l_arr, start) :
+			rte_fbarray_find_contig_free(l_arr, start);
+	ret = RTE_MIN(chunk_len, diff_len);
+
+	return ret;
+}
+
+static int
+sync_status(struct rte_memseg_list *primary_msl,
+		struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+		unsigned int msl_idx, bool used)
+{
+	struct rte_fbarray *l_arr, *p_arr;
+	int p_idx, l_chunk_len, p_chunk_len, ret;
+	int start, end;
+
+	/* this is a little bit tricky, but the basic idea is - walk both lists
+	 * and spot any places where there are discrepancies. walking both lists
+	 * and noting discrepancies in a single go is a hard problem, so we do
+	 * it in two passes - first we spot any places where allocated segments
+	 * mismatch (i.e. ensure that everything that's allocated in the primary
+	 * is also allocated in the secondary), and then we do it by looking at
+	 * free segments instead.
+	 *
+	 * we also need to aggregate changes into chunks, as we have to call
+	 * callbacks per allocation, not per page.
+	 */
+	l_arr = &local_msl->memseg_arr;
+	p_arr = &primary_msl->memseg_arr;
+
+	if (used)
+		p_idx = rte_fbarray_find_next_used(p_arr, 0);
+	else
+		p_idx = rte_fbarray_find_next_free(p_arr, 0);
+
+	while (p_idx >= 0) {
+		int next_chunk_search_idx;
+
+		if (used) {
+			p_chunk_len = rte_fbarray_find_contig_used(p_arr,
+					p_idx);
+			l_chunk_len = rte_fbarray_find_contig_used(l_arr,
+					p_idx);
+		} else {
+			p_chunk_len = rte_fbarray_find_contig_free(p_arr,
+					p_idx);
+			l_chunk_len = rte_fbarray_find_contig_free(l_arr,
+					p_idx);
+		}
+		/* best case scenario - no differences (or bigger, which will be
+		 * fixed during next iteration), look for next chunk
+		 */
+		if (l_chunk_len >= p_chunk_len) {
+			next_chunk_search_idx = p_idx + p_chunk_len;
+			goto next_chunk;
+		}
+
+		/* if both chunks start at the same point, skip parts we know
+		 * are identical, and sync the rest. each call to sync_chunk
+		 * will only sync contiguous segments, so we need to call this
+		 * until we are sure there are no more differences in this
+		 * chunk.
+		 */
+		start = p_idx + l_chunk_len;
+		end = p_idx + p_chunk_len;
+		do {
+			ret = sync_chunk(primary_msl, local_msl, hi, msl_idx,
+					used, start, end);
+			start += ret;
+		} while (start < end && ret >= 0);
+		/* if ret is negative, something went wrong */
+		if (ret < 0)
+			return -1;
+
+		next_chunk_search_idx = p_idx + p_chunk_len;
+next_chunk:
+		/* skip to end of this chunk */
+		if (used) {
+			p_idx = rte_fbarray_find_next_used(p_arr,
+					next_chunk_search_idx);
+		} else {
+			p_idx = rte_fbarray_find_next_free(p_arr,
+					next_chunk_search_idx);
+		}
+	}
+	return 0;
+}
+
+static int
+sync_existing(struct rte_memseg_list *primary_msl,
+		struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+		unsigned int msl_idx)
+{
+	int ret, dir_fd;
+
+	/* do not allow any page allocations during the time we're allocating,
+	 * because file creation and locking operations are not atomic,
+	 * and we might be the first or the last ones to use a particular page,
+	 * so we need to ensure atomicity of every operation.
+	 */
+	dir_fd = open(hi->hugedir, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n", __func__,
+			hi->hugedir, strerror(errno));
+		return -1;
+	}
+	/* blocking writelock */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", __func__,
+			hi->hugedir, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
+	/* ensure all allocated space is the same in both lists */
+	ret = sync_status(primary_msl, local_msl, hi, msl_idx, true);
+	if (ret < 0)
+		goto fail;
+
+	/* ensure all unallocated space is the same in both lists */
+	ret = sync_status(primary_msl, local_msl, hi, msl_idx, false);
+	if (ret < 0)
+		goto fail;
+
+	/* update version number */
+	local_msl->version = primary_msl->version;
+
+	close(dir_fd);
+
+	return 0;
+fail:
+	close(dir_fd);
+	return -1;
+}
+
+static int
+sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *primary_msl, *local_msl;
+	struct hugepage_info *hi = NULL;
+	unsigned int i;
+	int msl_idx;
+
+	msl_idx = msl - mcfg->memsegs;
+	primary_msl = &mcfg->memsegs[msl_idx];
+	local_msl = &local_memsegs[msl_idx];
+
+	for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+		uint64_t cur_sz =
+			internal_config.hugepage_info[i].hugepage_sz;
+		uint64_t msl_sz = primary_msl->page_sz;
+		if (msl_sz == cur_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	/* if versions don't match, synchronize everything */
+	if (local_msl->version != primary_msl->version &&
+			sync_existing(primary_msl, local_msl, hi, msl_idx))
+		return -1;
+	return 0;
+}
+
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+	/* nothing to be done in primary */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		return 0;
+
+	/* memalloc is locked, so it's safe to call thread-unsafe version */
+	if (rte_memseg_list_walk_thread_unsafe(sync_walk, NULL))
+		return -1;
+	return 0;
+}
+
+static int
+secondary_msl_create_walk(const struct rte_memseg_list *msl,
+		void *arg __rte_unused)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *primary_msl, *local_msl;
+	char name[PATH_MAX];
+	int msl_idx, ret;
+
+	msl_idx = msl - mcfg->memsegs;
+	primary_msl = &mcfg->memsegs[msl_idx];
+	local_msl = &local_memsegs[msl_idx];
+
+	/* create distinct fbarrays for each secondary */
+	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
+		primary_msl->memseg_arr.name, getpid());
+
+	ret = rte_fbarray_init(&local_msl->memseg_arr, name,
+		primary_msl->memseg_arr.len,
+		primary_msl->memseg_arr.elt_sz);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Cannot initialize local memory map\n");
+		return -1;
+	}
+	local_msl->base_va = primary_msl->base_va;
+
+	return 0;
+}
+
+static int
+secondary_lock_list_create_walk(const struct rte_memseg_list *msl,
+		void *arg __rte_unused)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int i, len;
+	int msl_idx;
+	int *data;
+
+	msl_idx = msl - mcfg->memsegs;
+	len = msl->memseg_arr.len;
+
+	/* ensure we have space to store lock fd per each possible segment */
+	data = malloc(sizeof(int) * len);
+	if (data == NULL) {
+		RTE_LOG(ERR, EAL, "Unable to allocate space for lock descriptors\n");
+		return -1;
+	}
+	/* set all fd's as invalid */
+	for (i = 0; i < len; i++)
+		data[i] = -1;
+
+	lock_fds[msl_idx].fds = data;
+	lock_fds[msl_idx].len = len;
+	lock_fds[msl_idx].count = 0;
+	lock_fds[msl_idx].memseg_list_fd = -1;
+
+	return 0;
+}
+
+int
+eal_memalloc_init(void)
+{
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0)
+			return -1;
+
+	/* initialize all of the lock fd lists */
+	if (internal_config.single_file_segments)
+		if (rte_memseg_list_walk(secondary_lock_list_create_walk, NULL))
+			return -1;
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 38853b75..dbf19499 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -28,6 +28,7 @@
 #include <numaif.h>
 #endif
 
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_launch.h>
@@ -39,6 +40,7 @@
 #include <rte_string_fns.h>
 
 #include "eal_private.h"
+#include "eal_memalloc.h"
 #include "eal_internal_cfg.h"
 #include "eal_filesystem.h"
 #include "eal_hugepages.h"
@@ -57,8 +59,6 @@
  * zone as well as a physical contiguous zone.
  */
 
-static uint64_t baseaddr_offset;
-
 static bool phys_addrs_available = true;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
@@ -66,7 +66,7 @@ static bool phys_addrs_available = true;
 static void
 test_phys_addrs_available(void)
 {
-	uint64_t tmp;
+	uint64_t tmp = 0;
 	phys_addr_t physaddr;
 
 	if (!rte_eal_has_hugepages()) {
@@ -221,82 +221,6 @@ aslr_enabled(void)
 	}
 }
 
-/*
- * Try to mmap *size bytes in /dev/zero. If it is successful, return the
- * pointer to the mmap'd area and keep *size unmodified. Else, retry
- * with a smaller zone: decrease *size by hugepage_sz until it reaches
- * 0. In this case, return NULL. Note: this function returns an address
- * which is a multiple of hugepage size.
- */
-static void *
-get_virtual_area(size_t *size, size_t hugepage_sz)
-{
-	void *addr;
-	void *addr_hint;
-	int fd;
-	long aligned_addr;
-
-	if (internal_config.base_virtaddr != 0) {
-		int page_size = sysconf(_SC_PAGE_SIZE);
-		addr_hint = (void *) (uintptr_t)
-			(internal_config.base_virtaddr + baseaddr_offset);
-		addr_hint = RTE_PTR_ALIGN_FLOOR(addr_hint, page_size);
-	} else {
-		addr_hint = NULL;
-	}
-
-	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
-
-
-	fd = open("/dev/zero", O_RDONLY);
-	if (fd < 0){
-		RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
-		return NULL;
-	}
-	do {
-		addr = mmap(addr_hint, (*size) + hugepage_sz, PROT_READ,
-#ifdef RTE_ARCH_PPC_64
-				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
-				MAP_PRIVATE,
-#endif
-				fd, 0);
-		if (addr == MAP_FAILED) {
-			*size -= hugepage_sz;
-		} else if (addr_hint != NULL && addr != addr_hint) {
-			RTE_LOG(WARNING, EAL, "WARNING! Base virtual address "
-				"hint (%p != %p) not respected!\n",
-				addr_hint, addr);
-			RTE_LOG(WARNING, EAL, "   This may cause issues with "
-				"mapping memory into secondary processes\n");
-		}
-	} while (addr == MAP_FAILED && *size > 0);
-
-	if (addr == MAP_FAILED) {
-		close(fd);
-		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
-			strerror(errno));
-		return NULL;
-	}
-
-	munmap(addr, (*size) + hugepage_sz);
-	close(fd);
-
-	/* align addr to a huge page size boundary */
-	aligned_addr = (long)addr;
-	aligned_addr += (hugepage_sz - 1);
-	aligned_addr &= (~(hugepage_sz - 1));
-	addr = (void *)(aligned_addr);
-
-	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
-		addr, *size);
-
-	/* increment offset */
-	baseaddr_offset += *size;
-
-	return addr;
-}
-
 static sigjmp_buf huge_jmpenv;
 
 static void huge_sigbus_handler(int signo __rte_unused)
@@ -330,13 +254,11 @@ void numa_error(char *where)
  */
 static unsigned
 map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
-		  uint64_t *essential_memory __rte_unused, int orig)
+		  uint64_t *essential_memory __rte_unused)
 {
 	int fd;
 	unsigned i;
 	void *virtaddr;
-	void *vma_addr = NULL;
-	size_t vma_len = 0;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	int node_id = -1;
 	int essential_prev = 0;
@@ -351,7 +273,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 		have_numa = false;
 	}
 
-	if (orig && have_numa) {
+	if (have_numa) {
 		RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
 		if (get_mempolicy(&oldpolicy, oldmask->maskp,
 				  oldmask->size + 1, 0, 0) < 0) {
@@ -367,6 +289,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 #endif
 
 	for (i = 0; i < hpi->num_pages[0]; i++) {
+		struct hugepage_file *hf = &hugepg_tbl[i];
 		uint64_t hugepage_sz = hpi->hugepage_sz;
 
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
@@ -401,57 +324,14 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 		}
 #endif
 
-		if (orig) {
-			hugepg_tbl[i].file_id = i;
-			hugepg_tbl[i].size = hugepage_sz;
-			eal_get_hugefile_path(hugepg_tbl[i].filepath,
-					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
-					hugepg_tbl[i].file_id);
-			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
-		}
-#ifndef RTE_ARCH_64
-		/* for 32-bit systems, don't remap 1G and 16G pages, just reuse
-		 * original map address as final map address.
-		 */
-		else if ((hugepage_sz == RTE_PGSIZE_1G)
-			|| (hugepage_sz == RTE_PGSIZE_16G)) {
-			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
-			hugepg_tbl[i].orig_va = NULL;
-			continue;
-		}
-#endif
-		else if (vma_len == 0) {
-			unsigned j, num_pages;
-
-			/* reserve a virtual area for next contiguous
-			 * physical block: count the number of
-			 * contiguous physical pages. */
-			for (j = i+1; j < hpi->num_pages[0] ; j++) {
-#ifdef RTE_ARCH_PPC_64
-				/* The physical addresses are sorted in
-				 * descending order on PPC64 */
-				if (hugepg_tbl[j].physaddr !=
-				    hugepg_tbl[j-1].physaddr - hugepage_sz)
-					break;
-#else
-				if (hugepg_tbl[j].physaddr !=
-				    hugepg_tbl[j-1].physaddr + hugepage_sz)
-					break;
-#endif
-			}
-			num_pages = j - i;
-			vma_len = num_pages * hugepage_sz;
-
-			/* get the biggest virtual memory area up to
-			 * vma_len. If it fails, vma_addr is NULL, so
-			 * let the kernel provide the address. */
-			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
-			if (vma_addr == NULL)
-				vma_len = hugepage_sz;
-		}
+		hf->file_id = i;
+		hf->size = hugepage_sz;
+		eal_get_hugefile_path(hf->filepath, sizeof(hf->filepath),
+				hpi->hugedir, hf->file_id);
+		hf->filepath[sizeof(hf->filepath) - 1] = '\0';
 
 		/* try to create hugepage file */
-		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600);
+		fd = open(hf->filepath, O_CREAT | O_RDWR, 0600);
 		if (fd < 0) {
 			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
 					strerror(errno));
@@ -459,8 +339,11 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 		}
 
 		/* map the segment, and populate page tables,
-		 * the kernel fills this segment with zeros */
-		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
+		 * the kernel fills this segment with zeros. we don't care where
+		 * this gets mapped - we already have contiguous memory areas
+		 * ready for us to map into.
+		 */
+		virtaddr = mmap(NULL, hugepage_sz, PROT_READ | PROT_WRITE,
 				MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (virtaddr == MAP_FAILED) {
 			RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
@@ -469,41 +352,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 			goto out;
 		}
 
-		if (orig) {
-			hugepg_tbl[i].orig_va = virtaddr;
-		}
-		else {
-			hugepg_tbl[i].final_va = virtaddr;
-		}
+		hf->orig_va = virtaddr;
 
-		if (orig) {
-			/* In linux, hugetlb limitations, like cgroup, are
-			 * enforced at fault time instead of mmap(), even
-			 * with the option of MAP_POPULATE. Kernel will send
-			 * a SIGBUS signal. To avoid to be killed, save stack
-			 * environment here, if SIGBUS happens, we can jump
-			 * back here.
-			 */
-			if (huge_wrap_sigsetjmp()) {
-				RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
-					"hugepages of size %u MB\n",
-					(unsigned)(hugepage_sz / 0x100000));
-				munmap(virtaddr, hugepage_sz);
-				close(fd);
-				unlink(hugepg_tbl[i].filepath);
+		/* In linux, hugetlb limitations, like cgroup, are
+		 * enforced at fault time instead of mmap(), even
+		 * with the option of MAP_POPULATE. Kernel will send
+		 * a SIGBUS signal. To avoid to be killed, save stack
+		 * environment here, if SIGBUS happens, we can jump
+		 * back here.
+		 */
+		if (huge_wrap_sigsetjmp()) {
+			RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
+				"hugepages of size %u MB\n",
+				(unsigned int)(hugepage_sz / 0x100000));
+			munmap(virtaddr, hugepage_sz);
+			close(fd);
+			unlink(hugepg_tbl[i].filepath);
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
-				if (maxnode)
-					essential_memory[node_id] =
-						essential_prev;
+			if (maxnode)
+				essential_memory[node_id] =
+					essential_prev;
 #endif
-				goto out;
-			}
-			*(int *)virtaddr = 0;
+			goto out;
 		}
+		*(int *)virtaddr = 0;
 
-
-		/* set shared flock on the file. */
-		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+		/* set shared lock on the file. */
+		if (flock(fd, LOCK_SH) < 0) {
 			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
 				__func__, strerror(errno));
 			close(fd);
@@ -511,9 +386,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 		}
 
 		close(fd);
-
-		vma_addr = (char *)vma_addr + hugepage_sz;
-		vma_len -= hugepage_sz;
 	}
 
 out:
@@ -535,20 +407,6 @@ out:
 	return i;
 }
 
-/* Unmap all hugepages from original mapping */
-static int
-unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
-{
-        unsigned i;
-        for (i = 0; i < hpi->num_pages[0]; i++) {
-                if (hugepg_tbl[i].orig_va) {
-                        munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
-                        hugepg_tbl[i].orig_va = NULL;
-                }
-        }
-        return 0;
-}
-
 /*
  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
  * page.
@@ -663,7 +521,18 @@ static void *
 create_shared_memory(const char *filename, const size_t mem_size)
 {
 	void *retval;
-	int fd = open(filename, O_CREAT | O_RDWR, 0666);
+	int fd;
+
+	/* if no shared files mode is used, create anonymous memory instead */
+	if (internal_config.no_shconf) {
+		retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (retval == MAP_FAILED)
+			return NULL;
+		return retval;
+	}
+
+	fd = open(filename, O_CREAT | O_RDWR, 0666);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
@@ -688,7 +557,7 @@ copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
 	int src_pos, dst_pos = 0;
 
 	for (src_pos = 0; src_pos < src_size; src_pos++) {
-		if (src[src_pos].final_va != NULL) {
+		if (src[src_pos].orig_va != NULL) {
 			/* error on overflow attempt */
 			if (dst_pos == dest_size)
 				return -1;
@@ -759,9 +628,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 						unmap_len = hp->size;
 
 						/* get start addr and len of the remaining segment */
-						munmap(hp->final_va, (size_t) unmap_len);
+						munmap(hp->orig_va,
+							(size_t)unmap_len);
 
-						hp->final_va = NULL;
+						hp->orig_va = NULL;
 						if (unlink(hp->filepath) == -1) {
 							RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
 									__func__, hp->filepath, strerror(errno));
@@ -780,6 +650,436 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 	return 0;
 }
 
+static int
+remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct rte_fbarray *arr;
+	int cur_page, seg_len;
+	unsigned int msl_idx;
+	int ms_idx;
+	uint64_t page_sz;
+	size_t memseg_len;
+	int socket_id;
+
+	page_sz = hugepages[seg_start].size;
+	socket_id = hugepages[seg_start].socket_id;
+	seg_len = seg_end - seg_start;
+
+	RTE_LOG(DEBUG, EAL, "Attempting to map %" PRIu64 "M on socket %i\n",
+			(seg_len * page_sz) >> 20ULL, socket_id);
+
+	/* find free space in memseg lists */
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		bool empty;
+		msl = &mcfg->memsegs[msl_idx];
+		arr = &msl->memseg_arr;
+
+		if (msl->page_sz != page_sz)
+			continue;
+		if (msl->socket_id != socket_id)
+			continue;
+
+		/* leave space for a hole if array is not empty */
+		empty = arr->count == 0;
+		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
+				seg_len + (empty ? 0 : 1));
+
+		/* memseg list is full? */
+		if (ms_idx < 0)
+			continue;
+
+		/* leave some space between memsegs, they are not IOVA
+		 * contiguous, so they shouldn't be VA contiguous either.
+		 */
+		if (!empty)
+			ms_idx++;
+		break;
+	}
+	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
+				RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
+				RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
+		return -1;
+	}
+
+#ifdef RTE_ARCH_PPC64
+	/* for PPC64 we go through the list backwards */
+	for (cur_page = seg_end - 1; cur_page >= seg_start;
+			cur_page--, ms_idx++) {
+#else
+	for (cur_page = seg_start; cur_page < seg_end; cur_page++, ms_idx++) {
+#endif
+		struct hugepage_file *hfile = &hugepages[cur_page];
+		struct rte_memseg *ms = rte_fbarray_get(arr, ms_idx);
+		void *addr;
+		int fd;
+
+		fd = open(hfile->filepath, O_RDWR);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
+					hfile->filepath, strerror(errno));
+			return -1;
+		}
+		/* set shared lock on the file. */
+		if (flock(fd, LOCK_SH) < 0) {
+			RTE_LOG(DEBUG, EAL, "Could not lock '%s': %s\n",
+					hfile->filepath, strerror(errno));
+			close(fd);
+			return -1;
+		}
+		memseg_len = (size_t)page_sz;
+		addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
+
+		/* we know this address is already mmapped by memseg list, so
+		 * using MAP_FIXED here is safe
+		 */
+		addr = mmap(addr, page_sz, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, 0);
+		if (addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "Couldn't remap '%s': %s\n",
+					hfile->filepath, strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		/* we have a new address, so unmap previous one */
+#ifndef RTE_ARCH_64
+		/* in 32-bit legacy mode, we have already unmapped the page */
+		if (!internal_config.legacy_mem)
+			munmap(hfile->orig_va, page_sz);
+#else
+		munmap(hfile->orig_va, page_sz);
+#endif
+
+		hfile->orig_va = NULL;
+		hfile->final_va = addr;
+
+		/* rewrite physical addresses in IOVA as VA mode */
+		if (rte_eal_iova_mode() == RTE_IOVA_VA)
+			hfile->physaddr = (uintptr_t)addr;
+
+		/* set up memseg data */
+		ms->addr = addr;
+		ms->hugepage_sz = page_sz;
+		ms->len = memseg_len;
+		ms->iova = hfile->physaddr;
+		ms->socket_id = hfile->socket_id;
+		ms->nchannel = rte_memory_get_nchannel();
+		ms->nrank = rte_memory_get_nrank();
+
+		rte_fbarray_set_used(arr, ms_idx);
+
+		close(fd);
+	}
+	RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
+			(seg_len * page_sz) >> 20, socket_id);
+	return 0;
+}
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+	uint64_t area_sz, max_pages;
+
+	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+	max_pages = RTE_MAX_MEMSEG_PER_LIST;
+	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+	/* make sure the list isn't smaller than the page size */
+	area_sz = RTE_MAX(area_sz, page_sz);
+
+	return RTE_ALIGN(area_sz, page_sz);
+}
+
+static int
+free_memseg_list(struct rte_memseg_list *msl)
+{
+	if (rte_fbarray_destroy(&msl->memseg_arr)) {
+		RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
+		return -1;
+	}
+	memset(msl, 0, sizeof(*msl));
+	return 0;
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+		int n_segs, int socket_id, int type_msl_idx)
+{
+	char name[RTE_FBARRAY_NAME_LEN];
+
+	snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+		 type_msl_idx);
+	if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+			sizeof(struct rte_memseg))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+			rte_strerror(rte_errno));
+		return -1;
+	}
+
+	msl->page_sz = page_sz;
+	msl->socket_id = socket_id;
+	msl->base_va = NULL;
+
+	RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+			(size_t)page_sz >> 10, socket_id);
+
+	return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+	uint64_t page_sz;
+	size_t mem_sz;
+	void *addr;
+	int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+
+	page_sz = msl->page_sz;
+	mem_sz = page_sz * msl->memseg_arr.len;
+
+	addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+	if (addr == NULL) {
+		if (rte_errno == EADDRNOTAVAIL)
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+				(unsigned long long)mem_sz, msl->base_va);
+		else
+			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+		return -1;
+	}
+	msl->base_va = addr;
+
+	return 0;
+}
+
+/*
+ * Our VA space is not preallocated yet, so preallocate it here. We need to know
+ * how many segments there are in order to map all pages into one address space,
+ * and leave appropriate holes between segments so that rte_malloc does not
+ * concatenate them into one big segment.
+ *
+ * we also need to unmap original pages to free up address space.
+ */
+static int __rte_unused
+prealloc_segments(struct hugepage_file *hugepages, int n_pages)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int cur_page, seg_start_page, end_seg, new_memseg;
+	unsigned int hpi_idx, socket, i;
+	int n_contig_segs, n_segs;
+	int msl_idx;
+
+	/* before we preallocate segments, we need to free up our VA space.
+	 * we're not removing files, and we already have information about
+	 * PA-contiguousness, so it is safe to unmap everything.
+	 */
+	for (cur_page = 0; cur_page < n_pages; cur_page++) {
+		struct hugepage_file *hpi = &hugepages[cur_page];
+		munmap(hpi->orig_va, hpi->size);
+		hpi->orig_va = NULL;
+	}
+
+	/* we cannot know how many page sizes and sockets we have discovered, so
+	 * loop over all of them
+	 */
+	for (hpi_idx = 0; hpi_idx < internal_config.num_hugepage_sizes;
+			hpi_idx++) {
+		uint64_t page_sz =
+			internal_config.hugepage_info[hpi_idx].hugepage_sz;
+
+		for (i = 0; i < rte_socket_count(); i++) {
+			struct rte_memseg_list *msl;
+
+			socket = rte_socket_id_by_idx(i);
+			n_contig_segs = 0;
+			n_segs = 0;
+			seg_start_page = -1;
+
+			for (cur_page = 0; cur_page < n_pages; cur_page++) {
+				struct hugepage_file *prev, *cur;
+				int prev_seg_start_page = -1;
+
+				cur = &hugepages[cur_page];
+				prev = cur_page == 0 ? NULL :
+						&hugepages[cur_page - 1];
+
+				new_memseg = 0;
+				end_seg = 0;
+
+				if (cur->size == 0)
+					end_seg = 1;
+				else if (cur->socket_id != (int) socket)
+					end_seg = 1;
+				else if (cur->size != page_sz)
+					end_seg = 1;
+				else if (cur_page == 0)
+					new_memseg = 1;
+#ifdef RTE_ARCH_PPC_64
+				/* On PPC64 architecture, the mmap always start
+				 * from higher address to lower address. Here,
+				 * physical addresses are in descending order.
+				 */
+				else if ((prev->physaddr - cur->physaddr) !=
+						cur->size)
+					new_memseg = 1;
+#else
+				else if ((cur->physaddr - prev->physaddr) !=
+						cur->size)
+					new_memseg = 1;
+#endif
+				if (new_memseg) {
+					/* if we're already inside a segment,
+					 * new segment means end of current one
+					 */
+					if (seg_start_page != -1) {
+						end_seg = 1;
+						prev_seg_start_page =
+								seg_start_page;
+					}
+					seg_start_page = cur_page;
+				}
+
+				if (end_seg) {
+					if (prev_seg_start_page != -1) {
+						/* we've found a new segment */
+						n_contig_segs++;
+						n_segs += cur_page -
+							prev_seg_start_page;
+					} else if (seg_start_page != -1) {
+						/* we didn't find new segment,
+						 * but did end current one
+						 */
+						n_contig_segs++;
+						n_segs += cur_page -
+								seg_start_page;
+						seg_start_page = -1;
+						continue;
+					} else {
+						/* we're skipping this page */
+						continue;
+					}
+				}
+				/* segment continues */
+			}
+			/* check if we missed last segment */
+			if (seg_start_page != -1) {
+				n_contig_segs++;
+				n_segs += cur_page - seg_start_page;
+			}
+
+			/* if no segments were found, do not preallocate */
+			if (n_segs == 0)
+				continue;
+
+			/* we now have total number of pages that we will
+			 * allocate for this segment list. add separator pages
+			 * to the total count, and preallocate VA space.
+			 */
+			n_segs += n_contig_segs - 1;
+
+			/* now, preallocate VA space for these segments */
+
+			/* first, find suitable memseg list for this */
+			for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
+					msl_idx++) {
+				msl = &mcfg->memsegs[msl_idx];
+
+				if (msl->base_va != NULL)
+					continue;
+				break;
+			}
+			if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+				RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n",
+					RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+				return -1;
+			}
+
+			/* now, allocate fbarray itself */
+			if (alloc_memseg_list(msl, page_sz, n_segs, socket,
+						msl_idx) < 0)
+				return -1;
+
+			/* finally, allocate VA space */
+			if (alloc_va_space(msl) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * We cannot reallocate memseg lists on the fly because PPC64 stores pages
+ * backwards, therefore we have to process the entire memseg first before
+ * remapping it into memseg list VA space.
+ */
+static int
+remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
+{
+	int cur_page, seg_start_page, new_memseg, ret;
+
+	seg_start_page = 0;
+	for (cur_page = 0; cur_page < n_pages; cur_page++) {
+		struct hugepage_file *prev, *cur;
+
+		new_memseg = 0;
+
+		cur = &hugepages[cur_page];
+		prev = cur_page == 0 ? NULL : &hugepages[cur_page - 1];
+
+		/* if size is zero, no more pages left */
+		if (cur->size == 0)
+			break;
+
+		if (cur_page == 0)
+			new_memseg = 1;
+		else if (cur->socket_id != prev->socket_id)
+			new_memseg = 1;
+		else if (cur->size != prev->size)
+			new_memseg = 1;
+#ifdef RTE_ARCH_PPC_64
+		/* On PPC64 architecture, the mmap always start from higher
+		 * address to lower address. Here, physical addresses are in
+		 * descending order.
+		 */
+		else if ((prev->physaddr - cur->physaddr) != cur->size)
+			new_memseg = 1;
+#else
+		else if ((cur->physaddr - prev->physaddr) != cur->size)
+			new_memseg = 1;
+#endif
+
+		if (new_memseg) {
+			/* if this isn't the first time, remap segment */
+			if (cur_page != 0) {
+				ret = remap_segment(hugepages, seg_start_page,
+						cur_page);
+				if (ret != 0)
+					return -1;
+			}
+			/* remember where we started */
+			seg_start_page = cur_page;
+		}
+		/* continuation of previous memseg */
+	}
+	/* we were stopped, but we didn't remap the last segment, do it now */
+	if (cur_page != 0) {
+		ret = remap_segment(hugepages, seg_start_page,
+				cur_page);
+		if (ret != 0)
+			return -1;
+	}
+	return 0;
+}
+
 static inline uint64_t
 get_socket_mem_size(int socket)
 {
@@ -788,8 +1088,7 @@ get_socket_mem_size(int socket)
 
 	for (i = 0; i < internal_config.num_hugepage_sizes; i++){
 		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL)
-			size += hpi->hugepage_sz * hpi->num_pages[socket];
+		size += hpi->hugepage_sz * hpi->num_pages[socket];
 	}
 
 	return size;
@@ -818,8 +1117,10 @@ calc_num_pages_per_socket(uint64_t * memory,
 
 	/* if specific memory amounts per socket weren't requested */
 	if (internal_config.force_sockets == 0) {
+		size_t total_size;
+#ifdef RTE_ARCH_64
 		int cpu_per_socket[RTE_MAX_NUMA_NODES];
-		size_t default_size, total_size;
+		size_t default_size;
 		unsigned lcore_id;
 
 		/* Compute number of cores per socket */
@@ -837,7 +1138,7 @@ calc_num_pages_per_socket(uint64_t * memory,
 
 			/* Set memory amount per socket */
 			default_size = (internal_config.memory * cpu_per_socket[socket])
-			                / rte_lcore_count();
+					/ rte_lcore_count();
 
 			/* Limit to maximum available memory on socket */
 			default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
@@ -854,18 +1155,40 @@ calc_num_pages_per_socket(uint64_t * memory,
 		for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
 			/* take whatever is available */
 			default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
-			                       total_size);
+					       total_size);
 
 			/* Update sizes */
 			memory[socket] += default_size;
 			total_size -= default_size;
 		}
+#else
+		/* in 32-bit mode, allocate all of the memory only on master
+		 * lcore socket
+		 */
+		total_size = internal_config.memory;
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
+				socket++) {
+			struct rte_config *cfg = rte_eal_get_configuration();
+			unsigned int master_lcore_socket;
+
+			master_lcore_socket =
+				rte_lcore_to_socket_id(cfg->master_lcore);
+
+			if (master_lcore_socket != socket)
+				continue;
+
+			/* Update sizes */
+			memory[socket] = total_size;
+			break;
+		}
+#endif
 	}
 
 	for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
 		/* skips if the memory on specific socket wasn't requested */
 		for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
-			hp_used[i].hugedir = hp_info[i].hugedir;
+			strlcpy(hp_used[i].hugedir, hp_info[i].hugedir,
+				sizeof(hp_used[i].hugedir));
 			hp_used[i].num_pages[socket] = RTE_MIN(
 					memory[socket] / hp_info[i].hugepage_sz,
 					hp_info[i].num_pages[socket]);
@@ -907,7 +1230,8 @@ calc_num_pages_per_socket(uint64_t * memory,
 			}
 		}
 		/* if we didn't satisfy all memory requirements per socket */
-		if (memory[socket] > 0) {
+		if (memory[socket] > 0 &&
+				internal_config.socket_mem[socket] != 0) {
 			/* to prevent icc errors */
 			requested = (unsigned) (internal_config.socket_mem[socket] /
 					0x100000);
@@ -939,7 +1263,7 @@ eal_get_hugepage_mem_size(void)
 
 	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
 		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL) {
+		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
 			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
 				size += hpi->hugepage_sz * hpi->num_pages[j];
 			}
@@ -987,17 +1311,19 @@ huge_recover_sigbus(void)
  *  6. unmap the first mapping
  *  7. fill memsegs in configuration with contiguous zones
  */
-int
-rte_eal_hugepage_init(void)
+static int
+eal_legacy_hugepage_init(void)
 {
 	struct rte_mem_config *mcfg;
 	struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
 	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+	struct rte_fbarray *arr;
+	struct rte_memseg *ms;
 
 	uint64_t memory[RTE_MAX_NUMA_NODES];
 
 	unsigned hp_offset;
-	int i, j, new_memseg;
+	int i, j;
 	int nr_hugefiles, nr_hugepages = 0;
 	void *addr;
 
@@ -1010,21 +1336,54 @@ rte_eal_hugepage_init(void)
 
 	/* hugetlbfs can be disabled */
 	if (internal_config.no_hugetlbfs) {
+		struct rte_memseg_list *msl;
+		uint64_t page_sz;
+		int n_segs, cur_seg;
+
+		/* nohuge mode is legacy mode */
+		internal_config.legacy_mem = 1;
+
+		/* create a memseg list */
+		msl = &mcfg->memsegs[0];
+
+		page_sz = RTE_PGSIZE_4K;
+		n_segs = internal_config.memory / page_sz;
+
+		if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
+					sizeof(struct rte_memseg))) {
+			RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+			return -1;
+		}
+
 		addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
-				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 		if (addr == MAP_FAILED) {
 			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
 					strerror(errno));
 			return -1;
 		}
-		if (rte_eal_iova_mode() == RTE_IOVA_VA)
-			mcfg->memseg[0].iova = (uintptr_t)addr;
-		else
-			mcfg->memseg[0].iova = RTE_BAD_IOVA;
-		mcfg->memseg[0].addr = addr;
-		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
-		mcfg->memseg[0].len = internal_config.memory;
-		mcfg->memseg[0].socket_id = 0;
+		msl->base_va = addr;
+		msl->page_sz = page_sz;
+		msl->socket_id = 0;
+
+		/* populate memsegs. each memseg is one page long */
+		for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
+			arr = &msl->memseg_arr;
+
+			ms = rte_fbarray_get(arr, cur_seg);
+			if (rte_eal_iova_mode() == RTE_IOVA_VA)
+				ms->iova = (uintptr_t)addr;
+			else
+				ms->iova = RTE_BAD_IOVA;
+			ms->addr = addr;
+			ms->hugepage_sz = page_sz;
+			ms->socket_id = 0;
+			ms->len = page_sz;
+
+			rte_fbarray_set_used(arr, cur_seg);
+
+			addr = RTE_PTR_ADD(addr, (size_t)page_sz);
+		}
 		return 0;
 	}
 
@@ -1057,7 +1416,6 @@ rte_eal_hugepage_init(void)
 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
 		memory[i] = internal_config.socket_mem[i];
 
-
 	/* map all hugepages and sort them */
 	for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
 		unsigned pages_old, pages_new;
@@ -1075,8 +1433,7 @@ rte_eal_hugepage_init(void)
 
 		/* map all hugepages available */
 		pages_old = hpi->num_pages[0];
-		pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi,
-					      memory, 1);
+		pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory);
 		if (pages_new < pages_old) {
 			RTE_LOG(DEBUG, EAL,
 				"%d not %d hugepages of size %u MB allocated\n",
@@ -1091,7 +1448,8 @@ rte_eal_hugepage_init(void)
 				continue;
 		}
 
-		if (phys_addrs_available) {
+		if (phys_addrs_available &&
+				rte_eal_iova_mode() != RTE_IOVA_VA) {
 			/* find physical addresses for each hugepage */
 			if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
 				RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
@@ -1118,18 +1476,6 @@ rte_eal_hugepage_init(void)
 		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
 		      sizeof(struct hugepage_file), cmp_physaddr);
 
-		/* remap all hugepages */
-		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, NULL, 0) !=
-		    hpi->num_pages[0]) {
-			RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n",
-					(unsigned)(hpi->hugepage_sz / 0x100000));
-			goto fail;
-		}
-
-		/* unmap original mappings */
-		if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0)
-			goto fail;
-
 		/* we have processed a num of hugepages of this size, so inc offset */
 		hp_offset += hpi->num_pages[0];
 	}
@@ -1191,7 +1537,7 @@ rte_eal_hugepage_init(void)
 	}
 
 	/* create shared memory */
-	hugepage = create_shared_memory(eal_hugepage_info_path(),
+	hugepage = create_shared_memory(eal_hugepage_data_path(),
 			nr_hugefiles * sizeof(struct hugepage_file));
 
 	if (hugepage == NULL) {
@@ -1212,7 +1558,7 @@ rte_eal_hugepage_init(void)
 
 	/*
 	 * copy stuff from malloc'd hugepage* to the actual shared memory.
-	 * this procedure only copies those hugepages that have final_va
+	 * this procedure only copies those hugepages that have orig_va
 	 * not NULL. has overflow protection.
 	 */
 	if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
@@ -1221,6 +1567,23 @@ rte_eal_hugepage_init(void)
 		goto fail;
 	}
 
+#ifndef RTE_ARCH_64
+	/* for legacy 32-bit mode, we did not preallocate VA space, so do it */
+	if (internal_config.legacy_mem &&
+			prealloc_segments(hugepage, nr_hugefiles)) {
+		RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n");
+		goto fail;
+	}
+#endif
+
+	/* remap all pages we do need into memseg list VA space, so that those
+	 * pages become first-class citizens in DPDK memory subsystem
+	 */
+	if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
+		RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
+		goto fail;
+	}
+
 	/* free the hugepage backing files */
 	if (internal_config.hugepage_unlink &&
 		unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) {
@@ -1232,75 +1595,30 @@ rte_eal_hugepage_init(void)
 	free(tmp_hp);
 	tmp_hp = NULL;
 
-	/* first memseg index shall be 0 after incrementing it below */
-	j = -1;
-	for (i = 0; i < nr_hugefiles; i++) {
-		new_memseg = 0;
-
-		/* if this is a new section, create a new memseg */
-		if (i == 0)
-			new_memseg = 1;
-		else if (hugepage[i].socket_id != hugepage[i-1].socket_id)
-			new_memseg = 1;
-		else if (hugepage[i].size != hugepage[i-1].size)
-			new_memseg = 1;
-
-#ifdef RTE_ARCH_PPC_64
-		/* On PPC64 architecture, the mmap always start from higher
-		 * virtual address to lower address. Here, both the physical
-		 * address and virtual address are in descending order */
-		else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) !=
-		    hugepage[i].size)
-			new_memseg = 1;
-		else if (((unsigned long)hugepage[i-1].final_va -
-		    (unsigned long)hugepage[i].final_va) != hugepage[i].size)
-			new_memseg = 1;
-#else
-		else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
-		    hugepage[i].size)
-			new_memseg = 1;
-		else if (((unsigned long)hugepage[i].final_va -
-		    (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)
-			new_memseg = 1;
-#endif
+	munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
 
-		if (new_memseg) {
-			j += 1;
-			if (j == RTE_MAX_MEMSEG)
-				break;
+	/* we're not going to allocate more pages, so release VA space for
+	 * unused memseg lists
+	 */
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *msl = &mcfg->memsegs[i];
+		size_t mem_sz;
 
-			mcfg->memseg[j].iova = hugepage[i].physaddr;
-			mcfg->memseg[j].addr = hugepage[i].final_va;
-			mcfg->memseg[j].len = hugepage[i].size;
-			mcfg->memseg[j].socket_id = hugepage[i].socket_id;
-			mcfg->memseg[j].hugepage_sz = hugepage[i].size;
-		}
-		/* continuation of previous memseg */
-		else {
-#ifdef RTE_ARCH_PPC_64
-		/* Use the phy and virt address of the last page as segment
-		 * address for IBM Power architecture */
-			mcfg->memseg[j].iova = hugepage[i].physaddr;
-			mcfg->memseg[j].addr = hugepage[i].final_va;
-#endif
-			mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
-		}
-		hugepage[i].memseg_id = j;
-	}
+		/* skip inactive lists */
+		if (msl->base_va == NULL)
+			continue;
+		/* skip lists where there is at least one page allocated */
+		if (msl->memseg_arr.count > 0)
+			continue;
+		/* this is an unused list, deallocate it */
+		mem_sz = (size_t)msl->page_sz * msl->memseg_arr.len;
+		munmap(msl->base_va, mem_sz);
+		msl->base_va = NULL;
 
-	if (i < nr_hugefiles) {
-		RTE_LOG(ERR, EAL, "Can only reserve %d pages "
-			"from %d requested\n"
-			"Current %s=%d is not enough\n"
-			"Please either increase it or request less amount "
-			"of memory.\n",
-			i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
-			RTE_MAX_MEMSEG);
-		goto fail;
+		/* destroy backing fbarray */
+		rte_fbarray_destroy(&msl->memseg_arr);
 	}
 
-	munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
-
 	return 0;
 
 fail:
@@ -1312,6 +1630,125 @@ fail:
 	return -1;
 }
 
+static int __rte_unused
+hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct hugepage_info *hpi = arg;
+
+	if (msl->page_sz != hpi->hugepage_sz)
+		return 0;
+
+	hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
+	return 0;
+}
+
+static int
+limits_callback(int socket_id, size_t cur_limit, size_t new_len)
+{
+	RTE_SET_USED(socket_id);
+	RTE_SET_USED(cur_limit);
+	RTE_SET_USED(new_len);
+	return -1;
+}
+
+static int
+eal_hugepage_init(void)
+{
+	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+	uint64_t memory[RTE_MAX_NUMA_NODES];
+	int hp_sz_idx, socket_id;
+
+	test_phys_addrs_available();
+
+	memset(used_hp, 0, sizeof(used_hp));
+
+	for (hp_sz_idx = 0;
+			hp_sz_idx < (int) internal_config.num_hugepage_sizes;
+			hp_sz_idx++) {
+#ifndef RTE_ARCH_64
+		struct hugepage_info dummy;
+		unsigned int i;
+#endif
+		/* also initialize used_hp hugepage sizes in used_hp */
+		struct hugepage_info *hpi;
+		hpi = &internal_config.hugepage_info[hp_sz_idx];
+		used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
+
+#ifndef RTE_ARCH_64
+		/* for 32-bit, limit number of pages on socket to whatever we've
+		 * preallocated, as we cannot allocate more.
+		 */
+		memset(&dummy, 0, sizeof(dummy));
+		dummy.hugepage_sz = hpi->hugepage_sz;
+		if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
+			return -1;
+
+		for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
+			hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
+					dummy.num_pages[i]);
+		}
+#endif
+	}
+
+	/* make a copy of socket_mem, needed for balanced allocation. */
+	for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
+		memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
+
+	/* calculate final number of pages */
+	if (calc_num_pages_per_socket(memory,
+			internal_config.hugepage_info, used_hp,
+			internal_config.num_hugepage_sizes) < 0)
+		return -1;
+
+	for (hp_sz_idx = 0;
+			hp_sz_idx < (int)internal_config.num_hugepage_sizes;
+			hp_sz_idx++) {
+		for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
+				socket_id++) {
+			struct rte_memseg **pages;
+			struct hugepage_info *hpi = &used_hp[hp_sz_idx];
+			unsigned int num_pages = hpi->num_pages[socket_id];
+			int num_pages_alloc, i;
+
+			if (num_pages == 0)
+				continue;
+
+			pages = malloc(sizeof(*pages) * num_pages);
+
+			RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n",
+				num_pages, hpi->hugepage_sz >> 20, socket_id);
+
+			num_pages_alloc = eal_memalloc_alloc_seg_bulk(pages,
+					num_pages, hpi->hugepage_sz,
+					socket_id, true);
+			if (num_pages_alloc < 0) {
+				free(pages);
+				return -1;
+			}
+
+			/* mark preallocated pages as unfreeable */
+			for (i = 0; i < num_pages_alloc; i++) {
+				struct rte_memseg *ms = pages[i];
+				ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
+			}
+			free(pages);
+		}
+	}
+	/* if socket limits were specified, set them */
+	if (internal_config.force_socket_limits) {
+		unsigned int i;
+		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+			uint64_t limit = internal_config.socket_limit[i];
+			if (limit == 0)
+				continue;
+			if (rte_mem_alloc_validator_register("socket-limit",
+					limits_callback, i, limit))
+				RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
+		}
+	}
+	return 0;
+}
+
 /*
  * uses fstat to report the size of a file on disk
  */
@@ -1330,16 +1767,15 @@ getFileSize(int fd)
  * configuration and finds the hugepages which form that segment, mapping them
  * in order to form a contiguous block in the virtual memory space
  */
-int
-rte_eal_hugepage_attach(void)
+static int
+eal_legacy_hugepage_attach(void)
 {
-	const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct hugepage_file *hp = NULL;
-	unsigned num_hp = 0;
-	unsigned i, s = 0; /* s used to track the segment number */
-	unsigned max_seg = RTE_MAX_MEMSEG;
+	unsigned int num_hp = 0;
+	unsigned int i = 0;
+	unsigned int cur_seg;
 	off_t size = 0;
-	int fd, fd_zero = -1, fd_hugepage = -1;
+	int fd, fd_hugepage = -1;
 
 	if (aslr_enabled() > 0) {
 		RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
@@ -1350,139 +1786,429 @@ rte_eal_hugepage_attach(void)
 
 	test_phys_addrs_available();
 
-	fd_zero = open("/dev/zero", O_RDONLY);
-	if (fd_zero < 0) {
-		RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
-		goto error;
-	}
-	fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
+	fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
 	if (fd_hugepage < 0) {
-		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+		RTE_LOG(ERR, EAL, "Could not open %s\n",
+				eal_hugepage_data_path());
 		goto error;
 	}
 
-	/* map all segments into memory to make sure we get the addrs */
-	for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
-		void *base_addr;
-
-		/*
-		 * the first memory segment with len==0 is the one that
-		 * follows the last valid segment.
-		 */
-		if (mcfg->memseg[s].len == 0)
-			break;
-
-		/*
-		 * fdzero is mmapped to get a contiguous block of virtual
-		 * addresses of the appropriate memseg size.
-		 * use mmap to get identical addresses as the primary process.
-		 */
-		base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
-				 PROT_READ,
-#ifdef RTE_ARCH_PPC_64
-				 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
-				 MAP_PRIVATE,
-#endif
-				 fd_zero, 0);
-		if (base_addr == MAP_FAILED ||
-		    base_addr != mcfg->memseg[s].addr) {
-			max_seg = s;
-			if (base_addr != MAP_FAILED) {
-				/* errno is stale, don't use */
-				RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-					"in /dev/zero at [%p], got [%p] - "
-					"please use '--base-virtaddr' option\n",
-					(unsigned long long)mcfg->memseg[s].len,
-					mcfg->memseg[s].addr, base_addr);
-				munmap(base_addr, mcfg->memseg[s].len);
-			} else {
-				RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-					"in /dev/zero at [%p]: '%s'\n",
-					(unsigned long long)mcfg->memseg[s].len,
-					mcfg->memseg[s].addr, strerror(errno));
-			}
-			if (aslr_enabled() > 0) {
-				RTE_LOG(ERR, EAL, "It is recommended to "
-					"disable ASLR in the kernel "
-					"and retry running both primary "
-					"and secondary processes\n");
-			}
-			goto error;
-		}
-	}
-
 	size = getFileSize(fd_hugepage);
 	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
 	if (hp == MAP_FAILED) {
-		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+		RTE_LOG(ERR, EAL, "Could not mmap %s\n",
+				eal_hugepage_data_path());
 		goto error;
 	}
 
 	num_hp = size / sizeof(struct hugepage_file);
 	RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
 
-	s = 0;
-	while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
-		void *addr, *base_addr;
-		uintptr_t offset = 0;
-		size_t mapping_size;
-		/*
-		 * free previously mapped memory so we can map the
-		 * hugepages into the space
-		 */
-		base_addr = mcfg->memseg[s].addr;
-		munmap(base_addr, mcfg->memseg[s].len);
-
-		/* find the hugepages for this segment and map them
-		 * we don't need to worry about order, as the server sorted the
-		 * entries before it did the second mmap of them */
-		for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
-			if (hp[i].memseg_id == (int)s){
-				fd = open(hp[i].filepath, O_RDWR);
-				if (fd < 0) {
-					RTE_LOG(ERR, EAL, "Could not open %s\n",
-						hp[i].filepath);
-					goto error;
-				}
-				mapping_size = hp[i].size;
-				addr = mmap(RTE_PTR_ADD(base_addr, offset),
-						mapping_size, PROT_READ | PROT_WRITE,
-						MAP_SHARED, fd, 0);
-				close(fd); /* close file both on success and on failure */
-				if (addr == MAP_FAILED ||
-						addr != RTE_PTR_ADD(base_addr, offset)) {
-					RTE_LOG(ERR, EAL, "Could not mmap %s\n",
-						hp[i].filepath);
-					goto error;
-				}
-				offset+=mapping_size;
-			}
+	/* map all segments into memory to make sure we get the addrs. the
+	 * segments themselves are already in memseg list (which is shared and
+	 * has its VA space already preallocated), so we just need to map
+	 * everything into correct addresses.
+	 */
+	for (i = 0; i < num_hp; i++) {
+		struct hugepage_file *hf = &hp[i];
+		size_t map_sz = hf->size;
+		void *map_addr = hf->final_va;
+
+		/* if size is zero, no more pages left */
+		if (map_sz == 0)
+			break;
+
+		fd = open(hf->filepath, O_RDWR);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+				hf->filepath, strerror(errno));
+			goto error;
+		}
+
+		map_addr = mmap(map_addr, map_sz, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_FIXED, fd, 0);
+		if (map_addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
+				hf->filepath, strerror(errno));
+			close(fd);
+			goto error;
+		}
+
+		/* set shared lock on the file. */
+		if (flock(fd, LOCK_SH) < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
+				__func__, strerror(errno));
+			close(fd);
+			goto error;
 		}
-		RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
-				(unsigned long long)mcfg->memseg[s].len);
-		s++;
+
+		close(fd);
 	}
 	/* unmap the hugepage config file, since we are done using it */
 	munmap(hp, size);
-	close(fd_zero);
 	close(fd_hugepage);
 	return 0;
 
 error:
-	for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++)
-		munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len);
+	/* map all segments into memory to make sure we get the addrs */
+	cur_seg = 0;
+	for (cur_seg = 0; cur_seg < i; cur_seg++) {
+		struct hugepage_file *hf = &hp[i];
+		size_t map_sz = hf->size;
+		void *map_addr = hf->final_va;
+
+		munmap(map_addr, map_sz);
+	}
 	if (hp != NULL && hp != MAP_FAILED)
 		munmap(hp, size);
-	if (fd_zero >= 0)
-		close(fd_zero);
 	if (fd_hugepage >= 0)
 		close(fd_hugepage);
 	return -1;
 }
 
+static int
+eal_hugepage_attach(void)
+{
+	if (eal_memalloc_sync_with_primary()) {
+		RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
+		if (aslr_enabled() > 0)
+			RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel and retry running both primary and secondary processes\n");
+		return -1;
+	}
+	return 0;
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+	return internal_config.legacy_mem ?
+			eal_legacy_hugepage_init() :
+			eal_hugepage_init();
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+	return internal_config.legacy_mem ?
+			eal_legacy_hugepage_attach() :
+			eal_hugepage_attach();
+}
+
 int
 rte_eal_using_phys_addrs(void)
 {
 	return phys_addrs_available;
 }
+
+static int __rte_unused
+memseg_primary_init_32(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int active_sockets, hpi_idx, msl_idx = 0;
+	unsigned int socket_id, i;
+	struct rte_memseg_list *msl;
+	uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
+	uint64_t max_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	/* this is a giant hack, but desperate times call for desperate
+	 * measures. in legacy 32-bit mode, we cannot preallocate VA space,
+	 * because having upwards of 2 gigabytes of VA space already mapped will
+	 * interfere with our ability to map and sort hugepages.
+	 *
+	 * therefore, in legacy 32-bit mode, we will be initializing memseg
+	 * lists much later - in eal_memory.c, right after we unmap all the
+	 * unneeded pages. this will not affect secondary processes, as those
+	 * should be able to mmap the space without (too many) problems.
+	 */
+	if (internal_config.legacy_mem)
+		return 0;
+
+	/* 32-bit mode is a very special case. we cannot know in advance where
+	 * the user will want to allocate their memory, so we have to do some
+	 * heuristics.
+	 */
+	active_sockets = 0;
+	total_requested_mem = 0;
+	if (internal_config.force_sockets)
+		for (i = 0; i < rte_socket_count(); i++) {
+			uint64_t mem;
+
+			socket_id = rte_socket_id_by_idx(i);
+			mem = internal_config.socket_mem[socket_id];
+
+			if (mem == 0)
+				continue;
+
+			active_sockets++;
+			total_requested_mem += mem;
+		}
+	else
+		total_requested_mem = internal_config.memory;
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	if (total_requested_mem > max_mem) {
+		RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
+				(unsigned int)(max_mem >> 20));
+		return -1;
+	}
+	total_extra_mem = max_mem - total_requested_mem;
+	extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
+			total_extra_mem / active_sockets;
+
+	/* the allocation logic is a little bit convoluted, but here's how it
+	 * works, in a nutshell:
+	 *  - if user hasn't specified on which sockets to allocate memory via
+	 *    --socket-mem, we allocate all of our memory on master core socket.
+	 *  - if user has specified sockets to allocate memory on, there may be
+	 *    some "unused" memory left (e.g. if user has specified --socket-mem
+	 *    such that not all memory adds up to 2 gigabytes), so add it to all
+	 *    sockets that are in use equally.
+	 *
+	 * page sizes are sorted by size in descending order, so we can safely
+	 * assume that we dispense with bigger page sizes first.
+	 */
+
+	/* create memseg lists */
+	for (i = 0; i < rte_socket_count(); i++) {
+		int hp_sizes = (int) internal_config.num_hugepage_sizes;
+		uint64_t max_socket_mem, cur_socket_mem;
+		unsigned int master_lcore_socket;
+		struct rte_config *cfg = rte_eal_get_configuration();
+		bool skip;
+
+		socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+		if (socket_id > 0)
+			break;
+#endif
+
+		/* if we didn't specifically request memory on this socket */
+		skip = active_sockets != 0 &&
+				internal_config.socket_mem[socket_id] == 0;
+		/* ...or if we didn't specifically request memory on *any*
+		 * socket, and this is not master lcore
+		 */
+		master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
+		skip |= active_sockets == 0 && socket_id != master_lcore_socket;
+
+		if (skip) {
+			RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
+					socket_id);
+			continue;
+		}
+
+		/* max amount of memory on this socket */
+		max_socket_mem = (active_sockets != 0 ?
+					internal_config.socket_mem[socket_id] :
+					internal_config.memory) +
+					extra_mem_per_socket;
+		cur_socket_mem = 0;
+
+		for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
+			uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
+			uint64_t hugepage_sz;
+			struct hugepage_info *hpi;
+			int type_msl_idx, max_segs, total_segs = 0;
+
+			hpi = &internal_config.hugepage_info[hpi_idx];
+			hugepage_sz = hpi->hugepage_sz;
+
+			/* check if pages are actually available */
+			if (hpi->num_pages[socket_id] == 0)
+				continue;
+
+			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+			max_pagesz_mem = max_socket_mem - cur_socket_mem;
+
+			/* make it multiple of page size */
+			max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
+					hugepage_sz);
+
+			RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
+					"%" PRIu64 "M on socket %i\n",
+					max_pagesz_mem >> 20, socket_id);
+
+			type_msl_idx = 0;
+			while (cur_pagesz_mem < max_pagesz_mem &&
+					total_segs < max_segs) {
+				uint64_t cur_mem;
+				unsigned int n_segs;
+
+				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+					RTE_LOG(ERR, EAL,
+						"No more space in memseg lists, please increase %s\n",
+						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+					return -1;
+				}
+
+				msl = &mcfg->memsegs[msl_idx];
+
+				cur_mem = get_mem_amount(hugepage_sz,
+						max_pagesz_mem);
+				n_segs = cur_mem / hugepage_sz;
+
+				if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+						socket_id, type_msl_idx)) {
+					/* failing to allocate a memseg list is
+					 * a serious error.
+					 */
+					RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+					return -1;
+				}
+
+				if (alloc_va_space(msl)) {
+					/* if we couldn't allocate VA space, we
+					 * can try with smaller page sizes.
+					 */
+					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
+					/* deallocate memseg list */
+					if (free_memseg_list(msl))
+						return -1;
+					break;
+				}
+
+				total_segs += msl->memseg_arr.len;
+				cur_pagesz_mem = total_segs * hugepage_sz;
+				type_msl_idx++;
+				msl_idx++;
+			}
+			cur_socket_mem += cur_pagesz_mem;
+		}
+		if (cur_socket_mem == 0) {
+			RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
+				socket_id);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __rte_unused
+memseg_primary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, socket_id, hpi_idx, msl_idx = 0;
+	struct rte_memseg_list *msl;
+	uint64_t max_mem, total_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	total_mem = 0;
+
+	/* create memseg lists */
+	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+			hpi_idx++) {
+		struct hugepage_info *hpi;
+		uint64_t hugepage_sz;
+
+		hpi = &internal_config.hugepage_info[hpi_idx];
+		hugepage_sz = hpi->hugepage_sz;
+
+		for (i = 0; i < (int) rte_socket_count(); i++) {
+			uint64_t max_type_mem, total_type_mem = 0;
+			int type_msl_idx, max_segs, total_segs = 0;
+
+			socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+			if (socket_id > 0)
+				break;
+#endif
+
+			if (total_mem >= max_mem)
+				break;
+
+			max_type_mem = RTE_MIN(max_mem - total_mem,
+				(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+			type_msl_idx = 0;
+			while (total_type_mem < max_type_mem &&
+					total_segs < max_segs) {
+				uint64_t cur_max_mem, cur_mem;
+				unsigned int n_segs;
+
+				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+					RTE_LOG(ERR, EAL,
+						"No more space in memseg lists, please increase %s\n",
+						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+					return -1;
+				}
+
+				msl = &mcfg->memsegs[msl_idx++];
+
+				cur_max_mem = max_type_mem - total_type_mem;
+
+				cur_mem = get_mem_amount(hugepage_sz,
+						cur_max_mem);
+				n_segs = cur_mem / hugepage_sz;
+
+				if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+						socket_id, type_msl_idx))
+					return -1;
+
+				total_segs += msl->memseg_arr.len;
+				total_type_mem = total_segs * hugepage_sz;
+				type_msl_idx++;
+
+				if (alloc_va_space(msl)) {
+					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+					return -1;
+				}
+			}
+			total_mem += total_type_mem;
+		}
+	}
+	return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx = 0;
+	struct rte_memseg_list *msl;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+		msl = &mcfg->memsegs[msl_idx];
+
+		/* skip empty memseg lists */
+		if (msl->memseg_arr.len == 0)
+			continue;
+
+		if (rte_fbarray_attach(&msl->memseg_arr)) {
+			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+			return -1;
+		}
+
+		/* preallocate VA space */
+		if (alloc_va_space(msl)) {
+			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+#ifndef RTE_ARCH_64
+			memseg_primary_init_32() :
+#else
+			memseg_primary_init() :
+#endif
+			memseg_secondary_init();
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 08e150b7..b496fc71 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
 	if (eal_thread_set_affinity() < 0)
 		rte_panic("cannot set affinity\n");
 
-	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
 		lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
@@ -176,7 +176,7 @@ int rte_sys_gettid(void)
 
 int rte_thread_setname(pthread_t id, const char *name)
 {
-	int ret = -1;
+	int ret = ENOSYS;
 #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
 #if __GLIBC_PREREQ(2, 12)
 	ret = pthread_setname_np(id, name);
@@ -184,5 +184,5 @@ int rte_thread_setname(pthread_t id, const char *name)
 #endif
 	RTE_SET_USED(id);
 	RTE_SET_USED(name);
-	return ret;
+	return -ret;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
index 161322f2..2766bd78 100644
--- a/lib/librte_eal/linuxapp/eal/eal_timer.c
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -137,7 +137,6 @@ int
 rte_eal_hpet_init(int make_default)
 {
 	int fd, ret;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
 	if (internal_config.no_hpet) {
 		RTE_LOG(NOTICE, EAL, "HPET is disabled\n");
@@ -178,7 +177,7 @@ rte_eal_hpet_init(int make_default)
 
 	/* create a thread that will increment a global variable for
 	 * msb (hpet is 32 bits by default under linux) */
-	ret = pthread_create(&msb_inc_thread_id, NULL,
+	ret = rte_ctrl_thread_create(&msb_inc_thread_id, "hpet-msb-inc", NULL,
 			(void *(*)(void *))hpet_msb_inc, NULL);
 	if (ret != 0) {
 		RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
@@ -186,15 +185,6 @@ rte_eal_hpet_init(int make_default)
 		return -1;
 	}
 
-	/*
-	 * Set thread_name for aid in debugging.
-	 */
-	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc");
-	ret = rte_thread_setname(msb_inc_thread_id, thread_name);
-	if (ret != 0)
-		RTE_LOG(DEBUG, EAL,
-			"Cannot set HPET timer thread name!\n");
-
 	if (make_default)
 		eal_timer_source = EAL_TIMER_HPET;
 	return 0;
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index e44ae4d0..c68dc38e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -1,12 +1,14 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
+#include <inttypes.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <sys/ioctl.h>
 
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_eal_memconfig.h>
@@ -18,59 +20,258 @@
 
 #ifdef VFIO_PRESENT
 
+#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
+
+/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
+ * recreate the mappings for DPDK segments, but we cannot do so for memory that
+ * was registered by the user themselves, so we need to store the user mappings
+ * somewhere, to recreate them later.
+ */
+#define VFIO_MAX_USER_MEM_MAPS 256
+struct user_mem_map {
+	uint64_t addr;
+	uint64_t iova;
+	uint64_t len;
+};
+
+struct user_mem_maps {
+	rte_spinlock_recursive_t lock;
+	int n_maps;
+	struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
+};
+
+struct vfio_config {
+	int vfio_enabled;
+	int vfio_container_fd;
+	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
+	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+	struct user_mem_maps mem_maps;
+};
+
 /* per-process VFIO config */
-static struct vfio_config vfio_cfg;
+static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS];
+static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0];
 
 static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_spapr_dma_map(int);
+static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
+static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr,
+		uint64_t iova, uint64_t len, int do_map);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
 	/* x86 IOMMU, otherwise known as type 1 */
-	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	{
+		.type_id = RTE_VFIO_TYPE1,
+		.name = "Type 1",
+		.dma_map_func = &vfio_type1_dma_map,
+		.dma_user_map_func = &vfio_type1_dma_mem_map
+	},
 	/* ppc64 IOMMU, otherwise known as spapr */
-	{ RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+	{
+		.type_id = RTE_VFIO_SPAPR,
+		.name = "sPAPR",
+		.dma_map_func = &vfio_spapr_dma_map,
+		.dma_user_map_func = &vfio_spapr_dma_mem_map
+	},
 	/* IOMMU-less mode */
-	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+	{
+		.type_id = RTE_VFIO_NOIOMMU,
+		.name = "No-IOMMU",
+		.dma_map_func = &vfio_noiommu_dma_map,
+		.dma_user_map_func = &vfio_noiommu_dma_mem_map
+	},
 };
 
-int
-vfio_get_group_fd(int iommu_group_no)
+static int
+is_null_map(const struct user_mem_map *map)
 {
-	int i;
-	int vfio_group_fd;
-	char filename[PATH_MAX];
-	struct vfio_group *cur_grp;
+	return map->addr == 0 && map->iova == 0 && map->len == 0;
+}
 
-	/* check if we already have the group descriptor open */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
-			return vfio_cfg.vfio_groups[i].fd;
+/* we may need to merge user mem maps together in case of user mapping/unmapping
+ * chunks of memory, so we'll need a comparator function to sort segments.
+ */
+static int
+user_mem_map_cmp(const void *a, const void *b)
+{
+	const struct user_mem_map *umm_a = a;
+	const struct user_mem_map *umm_b = b;
 
-	/* Lets see first if there is room for a new group */
-	if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+	/* move null entries to end */
+	if (is_null_map(umm_a))
+		return 1;
+	if (is_null_map(umm_b))
 		return -1;
-	}
 
-	/* Now lets get an index for the new group */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_no == -1) {
-			cur_grp = &vfio_cfg.vfio_groups[i];
-			break;
-		}
+	/* sort by iova first */
+	if (umm_a->iova < umm_b->iova)
+		return -1;
+	if (umm_a->iova > umm_b->iova)
+		return 1;
 
-	/* This should not happen */
-	if (i == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+	if (umm_a->addr < umm_b->addr)
 		return -1;
+	if (umm_a->addr > umm_b->addr)
+		return 1;
+
+	if (umm_a->len < umm_b->len)
+		return -1;
+	if (umm_a->len > umm_b->len)
+		return 1;
+
+	return 0;
+}
+
+/* adjust user map entry. this may result in shortening of existing map, or in
+ * splitting existing map in two pieces.
+ */
+static void
+adjust_map(struct user_mem_map *src, struct user_mem_map *end,
+		uint64_t remove_va_start, uint64_t remove_len)
+{
+	/* if va start is same as start address, we're simply moving start */
+	if (remove_va_start == src->addr) {
+		src->addr += remove_len;
+		src->iova += remove_len;
+		src->len -= remove_len;
+	} else if (remove_va_start + remove_len == src->addr + src->len) {
+		/* we're shrinking mapping from the end */
+		src->len -= remove_len;
+	} else {
+		/* we're blowing a hole in the middle */
+		struct user_mem_map tmp;
+		uint64_t total_len = src->len;
+
+		/* adjust source segment length */
+		src->len = remove_va_start - src->addr;
+
+		/* create temporary segment in the middle */
+		tmp.addr = src->addr + src->len;
+		tmp.iova = src->iova + src->len;
+		tmp.len = remove_len;
+
+		/* populate end segment - this one we will be keeping */
+		end->addr = tmp.addr + tmp.len;
+		end->iova = tmp.iova + tmp.len;
+		end->len = total_len - src->len - tmp.len;
+	}
+}
+
+/* try merging two maps into one, return 1 if succeeded */
+static int
+merge_map(struct user_mem_map *left, struct user_mem_map *right)
+{
+	if (left->addr + left->len != right->addr)
+		return 0;
+	if (left->iova + left->len != right->iova)
+		return 0;
+
+	left->len += right->len;
+
+	memset(right, 0, sizeof(*right));
+
+	return 1;
+}
+
+static struct user_mem_map *
+find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
+		uint64_t iova, uint64_t len)
+{
+	uint64_t va_end = addr + len;
+	uint64_t iova_end = iova + len;
+	int i;
+
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		struct user_mem_map *map = &user_mem_maps->maps[i];
+		uint64_t map_va_end = map->addr + map->len;
+		uint64_t map_iova_end = map->iova + map->len;
+
+		/* check start VA */
+		if (addr < map->addr || addr >= map_va_end)
+			continue;
+		/* check if VA end is within boundaries */
+		if (va_end <= map->addr || va_end > map_va_end)
+			continue;
+
+		/* check start IOVA */
+		if (iova < map->iova || iova >= map_iova_end)
+			continue;
+		/* check if IOVA end is within boundaries */
+		if (iova_end <= map->iova || iova_end > map_iova_end)
+			continue;
+
+		/* we've found our map */
+		return map;
 	}
+	return NULL;
+}
+
+/* this will sort all user maps, and merge/compact any adjacent maps */
+static void
+compact_user_maps(struct user_mem_maps *user_mem_maps)
+{
+	int i, n_merged, cur_idx;
+
+	qsort(user_mem_maps->maps, user_mem_maps->n_maps,
+			sizeof(user_mem_maps->maps[0]), user_mem_map_cmp);
+
+	/* we'll go over the list backwards when merging */
+	n_merged = 0;
+	for (i = user_mem_maps->n_maps - 2; i >= 0; i--) {
+		struct user_mem_map *l, *r;
+
+		l = &user_mem_maps->maps[i];
+		r = &user_mem_maps->maps[i + 1];
+
+		if (is_null_map(l) || is_null_map(r))
+			continue;
+
+		if (merge_map(l, r))
+			n_merged++;
+	}
+
+	/* the entries are still sorted, but now they have holes in them, so
+	 * walk through the list and remove the holes
+	 */
+	if (n_merged > 0) {
+		cur_idx = 0;
+		for (i = 0; i < user_mem_maps->n_maps; i++) {
+			if (!is_null_map(&user_mem_maps->maps[i])) {
+				struct user_mem_map *src, *dst;
+
+				src = &user_mem_maps->maps[i];
+				dst = &user_mem_maps->maps[cur_idx++];
+
+				if (src != dst) {
+					memcpy(dst, src, sizeof(*src));
+					memset(src, 0, sizeof(*src));
+				}
+			}
+		}
+		user_mem_maps->n_maps = cur_idx;
+	}
+}
+
+static int
+vfio_open_group_fd(int iommu_group_num)
+{
+	int vfio_group_fd;
+	char filename[PATH_MAX];
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
 	/* if primary, try to open the group */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
 		/* try regular group format */
 		snprintf(filename, sizeof(filename),
-				 VFIO_GROUP_FMT, iommu_group_no);
+				 VFIO_GROUP_FMT, iommu_group_num);
 		vfio_group_fd = open(filename, O_RDWR);
 		if (vfio_group_fd < 0) {
 			/* if file not found, it's not an error */
@@ -82,7 +283,8 @@ vfio_get_group_fd(int iommu_group_no)
 
 			/* special case: try no-IOMMU path as well */
 			snprintf(filename, sizeof(filename),
-					VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+					VFIO_NOIOMMU_GROUP_FMT,
+					iommu_group_num);
 			vfio_group_fd = open(filename, O_RDWR);
 			if (vfio_group_fd < 0) {
 				if (errno != ENOENT) {
@@ -95,178 +297,285 @@ vfio_get_group_fd(int iommu_group_no)
 			/* noiommu group found */
 		}
 
-		cur_grp->group_no = iommu_group_no;
-		cur_grp->fd = vfio_group_fd;
-		vfio_cfg.vfio_active_groups++;
 		return vfio_group_fd;
 	}
 	/* if we're in a secondary process, request group fd from the primary
-	 * process via our socket
+	 * process via mp channel.
 	 */
-	else {
-		int socket_fd, ret;
+	p->req = SOCKET_REQ_GROUP;
+	p->group_num = iommu_group_num;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_group_fd = -1;
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			vfio_group_fd = mp_rep->fds[0];
+		} else if (p->result == SOCKET_NO_FD) {
+			RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
+			vfio_group_fd = 0;
+		}
+		free(mp_reply.msgs);
+	}
 
-		socket_fd = vfio_mp_sync_connect_to_primary();
+	if (vfio_group_fd < 0)
+		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
+	return vfio_group_fd;
+}
 
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot send group number!\n");
-			close(socket_fd);
-			return -1;
-		}
-		ret = vfio_mp_sync_receive_request(socket_fd);
-		switch (ret) {
-		case SOCKET_NO_FD:
-			close(socket_fd);
-			return 0;
-		case SOCKET_OK:
-			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
-			/* if we got the fd, store it and return it */
-			if (vfio_group_fd > 0) {
-				close(socket_fd);
-				cur_grp->group_no = iommu_group_no;
-				cur_grp->fd = vfio_group_fd;
-				vfio_cfg.vfio_active_groups++;
-				return vfio_group_fd;
-			}
-			/* fall-through on error */
-		default:
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+static struct vfio_config *
+get_vfio_cfg_by_group_num(int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			if (vfio_cfg->vfio_groups[j].group_num ==
+					iommu_group_num)
+				return vfio_cfg;
 		}
 	}
-	return -1;
+
+	return NULL;
 }
 
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return vfio_cfg;
+	}
 
-static int
-get_vfio_group_idx(int vfio_group_fd)
+	return NULL;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
+{
+	int i;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == container_fd)
+			return &vfio_cfgs[i];
+	}
+
+	return NULL;
+}
+
+int
+rte_vfio_get_group_fd(int iommu_group_num)
 {
 	int i;
+	int vfio_group_fd;
+	struct vfio_group *cur_grp;
+	struct vfio_config *vfio_cfg;
+
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+	/* check if we already have the group descriptor open */
 	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)
-			return i;
+		if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num)
+			return vfio_cfg->vfio_groups[i].fd;
+
+	/* Lets see first if there is room for a new group */
+	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+		return -1;
+	}
+
+	/* Now lets get an index for the new group */
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num == -1) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+		return -1;
+	}
+
+	vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+	if (vfio_group_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+		return -1;
+	}
+
+	cur_grp->group_num = iommu_group_num;
+	cur_grp->fd = vfio_group_fd;
+	vfio_cfg->vfio_active_groups++;
+
+	return vfio_group_fd;
+}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return j;
+	}
+
 	return -1;
 }
 
 static void
 vfio_group_device_get(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices++;
+		vfio_cfg->vfio_groups[i].devices++;
 }
 
 static void
 vfio_group_device_put(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices--;
+		vfio_cfg->vfio_groups[i].devices--;
 }
 
 static int
 vfio_group_device_count(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return -1;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 		return -1;
 	}
 
-	return vfio_cfg.vfio_groups[i].devices;
+	return vfio_cfg->vfio_groups[i].devices;
 }
 
-int
-rte_vfio_clear_group(int vfio_group_fd)
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
+		void *arg __rte_unused)
 {
-	int i;
-	int socket_fd, ret;
-
-	if (internal_config.process_type == RTE_PROC_PRIMARY) {
-
-		i = get_vfio_group_idx(vfio_group_fd);
-		if (i < 0)
-			return -1;
-		vfio_cfg.vfio_groups[i].group_no = -1;
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
-		vfio_cfg.vfio_active_groups--;
-		return 0;
+	struct rte_memseg_list *msl;
+	struct rte_memseg *ms;
+	size_t cur_len = 0;
+
+	msl = rte_mem_virt2memseg_list(addr);
+
+	/* for IOVA as VA mode, no need to care for IOVA addresses */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+		uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
+		if (type == RTE_MEM_EVENT_ALLOC)
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 1);
+		else
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 0);
+		return;
 	}
 
-	/* This is just for SECONDARY processes */
-	socket_fd = vfio_mp_sync_connect_to_primary();
+	/* memsegs are contiguous in memory */
+	ms = rte_mem_virt2memseg(addr, msl);
+	while (cur_len < len) {
+		if (type == RTE_MEM_EVENT_ALLOC)
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 1);
+		else
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 0);
 
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-		return -1;
+		cur_len += ms->len;
+		++ms;
 	}
+}
 
-	if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-		close(socket_fd);
+int
+rte_vfio_clear_group(int vfio_group_fd)
+{
+	int i;
+	struct vfio_config *vfio_cfg;
+
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
 		return -1;
 	}
 
-	if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
-		close(socket_fd);
+	i = get_vfio_group_idx(vfio_group_fd);
+	if (i < 0)
 		return -1;
-	}
+	vfio_cfg->vfio_groups[i].group_num = -1;
+	vfio_cfg->vfio_groups[i].fd = -1;
+	vfio_cfg->vfio_groups[i].devices = 0;
+	vfio_cfg->vfio_active_groups--;
 
-	ret = vfio_mp_sync_receive_request(socket_fd);
-	switch (ret) {
-	case SOCKET_NO_FD:
-		RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-		close(socket_fd);
-		break;
-	case SOCKET_OK:
-		close(socket_fd);
-		return 0;
-	case SOCKET_ERR:
-		RTE_LOG(ERR, EAL, "  Socket error\n");
-		close(socket_fd);
-		break;
-	default:
-		RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
-		close(socket_fd);
-	}
-	return -1;
+	return 0;
 }
 
 int
 rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		int *vfio_dev_fd, struct vfio_device_info *device_info)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
+	int vfio_container_fd;
 	int vfio_group_fd;
-	int iommu_group_no;
-	int ret;
+	int iommu_group_num;
+	int i, ret;
 
 	/* get group number */
-	ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+	ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
 	if (ret == 0) {
 		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver, skipping\n",
 			dev_addr);
@@ -278,7 +587,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		return -1;
 
 	/* get the actual group fd */
-	vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+	vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
 	if (vfio_group_fd < 0)
 		return -1;
 
@@ -309,12 +618,18 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		return -1;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+	vfio_container_fd = vfio_cfg->vfio_container_fd;
+	user_mem_maps = &vfio_cfg->mem_maps;
+
 	/* check if group does not have a container yet */
 	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
 
 		/* add group to a container */
 		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
-				&vfio_cfg.vfio_container_fd);
+				&vfio_container_fd);
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  %s cannot add VFIO group to container, "
 					"error %i (%s)\n", dev_addr, errno, strerror(errno));
@@ -332,10 +647,12 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		 * functionality.
 		 */
 		if (internal_config.process_type == RTE_PROC_PRIMARY &&
-				vfio_cfg.vfio_active_groups == 1) {
+				vfio_cfg->vfio_active_groups == 1 &&
+				vfio_group_device_count(vfio_group_fd) == 0) {
+			const struct vfio_iommu_type *t;
+
 			/* select an IOMMU type which we will be using */
-			const struct vfio_iommu_type *t =
-				vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+			t = vfio_set_iommu_type(vfio_container_fd);
 			if (!t) {
 				RTE_LOG(ERR, EAL,
 					"  %s failed to select IOMMU type\n",
@@ -344,15 +661,75 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				rte_vfio_clear_group(vfio_group_fd);
 				return -1;
 			}
-			ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+			/* lock memory hotplug before mapping and release it
+			 * after registering callback, to prevent races
+			 */
+			rte_rwlock_read_lock(mem_lock);
+			if (vfio_cfg == default_vfio_cfg)
+				ret = t->dma_map_func(vfio_container_fd);
+			else
+				ret = 0;
 			if (ret) {
 				RTE_LOG(ERR, EAL,
 					"  %s DMA remapping failed, error %i (%s)\n",
 					dev_addr, errno, strerror(errno));
 				close(vfio_group_fd);
 				rte_vfio_clear_group(vfio_group_fd);
+				rte_rwlock_read_unlock(mem_lock);
 				return -1;
 			}
+
+			vfio_cfg->vfio_iommu_type = t;
+
+			/* re-map all user-mapped segments */
+			rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+			/* this IOMMU type may not support DMA mapping, but
+			 * if we have mappings in the list - that means we have
+			 * previously mapped something successfully, so we can
+			 * be sure that DMA mapping is supported.
+			 */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map;
+				map = &user_mem_maps->maps[i];
+
+				ret = t->dma_user_map_func(
+						vfio_container_fd,
+						map->addr, map->iova, map->len,
+						1);
+				if (ret) {
+					RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: "
+							"va: 0x%" PRIx64 " "
+							"iova: 0x%" PRIx64 " "
+							"len: 0x%" PRIu64 "\n",
+							map->addr, map->iova,
+							map->len);
+					rte_spinlock_recursive_unlock(
+							&user_mem_maps->lock);
+					rte_rwlock_read_unlock(mem_lock);
+					return -1;
+				}
+			}
+			rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+
+			/* register callback for mem events */
+			if (vfio_cfg == default_vfio_cfg)
+				ret = rte_mem_event_callback_register(
+					VFIO_MEM_EVENT_CLB_NAME,
+					vfio_mem_event_callback, NULL);
+			else
+				ret = 0;
+			/* unlock memory hotplug */
+			rte_rwlock_read_unlock(mem_lock);
+
+			if (ret && rte_errno != ENOTSUP) {
+				RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n");
+				return -1;
+			}
+			if (ret)
+				RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n");
+			else
+				RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
 		}
 	}
 
@@ -390,30 +767,45 @@ int
 rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		    int vfio_dev_fd)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
 	int vfio_group_fd;
-	int iommu_group_no;
+	int iommu_group_num;
 	int ret;
 
+	/* we don't want any DMA mapping messages to come while we're detaching
+	 * VFIO device, because this might be the last device and we might need
+	 * to unregister the callback.
+	 */
+	rte_rwlock_read_lock(mem_lock);
+
 	/* get group number */
-	ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+	ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
 	if (ret <= 0) {
 		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver\n",
 			dev_addr);
 		/* This is an error at this point. */
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* get the actual group fd */
-	vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+	vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
 	if (vfio_group_fd <= 0) {
-		RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n",
+		RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n",
 				   dev_addr);
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
 	/* At this point we got an active group. Closing it will make the
 	 * container detachment. If this is the last active group, VFIO kernel
 	 * code will unset the container and the IOMMU mappings.
@@ -423,7 +815,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	if (close(vfio_dev_fd) < 0) {
 		RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
 				   dev_addr);
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* An VFIO group can have several devices attached. Just when there is
@@ -435,30 +828,53 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		if (close(vfio_group_fd) < 0) {
 			RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
 				dev_addr);
-			return -1;
+			ret = -1;
+			goto out;
 		}
 
 		if (rte_vfio_clear_group(vfio_group_fd) < 0) {
 			RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
 					   dev_addr);
-			return -1;
+			ret = -1;
+			goto out;
 		}
 	}
 
-	return 0;
+	/* if there are no active device groups, unregister the callback to
+	 * avoid spurious attempts to map/unmap memory from VFIO.
+	 */
+	if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
+		rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
+				NULL);
+
+	/* success */
+	ret = 0;
+
+out:
+	rte_rwlock_read_unlock(mem_lock);
+	return ret;
 }
 
 int
 rte_vfio_enable(const char *modname)
 {
 	/* initialize group list */
-	int i;
+	int i, j;
 	int vfio_available;
 
-	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].group_no = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
+	rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfgs[i].vfio_container_fd = -1;
+		vfio_cfgs[i].vfio_active_groups = 0;
+		vfio_cfgs[i].vfio_iommu_type = NULL;
+		vfio_cfgs[i].mem_maps.lock = lock;
+
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			vfio_cfgs[i].vfio_groups[j].fd = -1;
+			vfio_cfgs[i].vfio_groups[j].group_num = -1;
+			vfio_cfgs[i].vfio_groups[j].devices = 0;
+		}
 	}
 
 	/* inform the user that we are probing for VFIO */
@@ -480,12 +896,12 @@ rte_vfio_enable(const char *modname)
 		return 0;
 	}
 
-	vfio_cfg.vfio_container_fd = vfio_get_container_fd();
+	default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd();
 
 	/* check if we have VFIO driver enabled */
-	if (vfio_cfg.vfio_container_fd != -1) {
+	if (default_vfio_cfg->vfio_container_fd != -1) {
 		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
-		vfio_cfg.vfio_enabled = 1;
+		default_vfio_cfg->vfio_enabled = 1;
 	} else {
 		RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
 	}
@@ -497,7 +913,7 @@ int
 rte_vfio_is_enabled(const char *modname)
 {
 	const int mod_available = rte_eal_check_module(modname) > 0;
-	return vfio_cfg.vfio_enabled && mod_available;
+	return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
 const struct vfio_iommu_type *
@@ -558,9 +974,14 @@ vfio_has_supported_extensions(int vfio_container_fd)
 }
 
 int
-vfio_get_container_fd(void)
+rte_vfio_get_container_fd(void)
 {
 	int ret, vfio_container_fd;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
 
 	/* if we're in a primary process, try to open the container */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
@@ -591,39 +1012,35 @@ vfio_get_container_fd(void)
 		}
 
 		return vfio_container_fd;
-	} else {
-		/*
-		 * if we're in a secondary process, request container fd from the
-		 * primary process via our socket
-		 */
-		int socket_fd;
-
-		socket_fd = vfio_mp_sync_connect_to_primary();
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
-		if (vfio_container_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+	}
+	/*
+	 * if we're in a secondary process, request container fd from the
+	 * primary process via mp channel
+	 */
+	p->req = SOCKET_REQ_CONTAINER;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_container_fd = -1;
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			free(mp_reply.msgs);
+			return mp_rep->fds[0];
 		}
-		close(socket_fd);
-		return vfio_container_fd;
+		free(mp_reply.msgs);
 	}
 
+	RTE_LOG(ERR, EAL, "  cannot request container fd\n");
 	return -1;
 }
 
 int
-vfio_get_group_no(const char *sysfs_base,
-		const char *dev_addr, int *iommu_group_no)
+rte_vfio_get_group_num(const char *sysfs_base,
+		const char *dev_addr, int *iommu_group_num)
 {
 	char linkname[PATH_MAX];
 	char filename[PATH_MAX];
@@ -655,7 +1072,7 @@ vfio_get_group_no(const char *sysfs_base,
 	errno = 0;
 	group_tok = tok[ret - 1];
 	end = group_tok;
-	*iommu_group_no = strtol(group_tok, &end, 10);
+	*iommu_group_num = strtol(group_tok, &end, 10);
 	if ((end != group_tok && *end != '\0') || errno != 0) {
 		RTE_LOG(ERR, EAL, "  %s error parsing IOMMU number!\n", dev_addr);
 		return -1;
@@ -665,34 +1082,49 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-vfio_type1_dma_map(int vfio_container_fd)
+type1_map(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
+	int *vfio_container_fd = arg;
 
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
+	return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 1);
+}
 
-		if (ms[i].addr == NULL)
-			break;
+static int
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
+{
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
 
+	if (do_map != 0) {
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		if (rte_eal_iova_mode() == RTE_IOVA_VA)
-			dma_map.iova = dma_map.vaddr;
-		else
-			dma_map.iova = ms[i].iova;
-		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
+		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+				VFIO_DMA_MAP_FLAG_WRITE;
 
 		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, error %i (%s)\n",
+				errno, strerror(errno));
+				return -1;
+		}
+	} else {
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
 
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+				&dma_unmap);
 		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
-					  "error %i (%s)\n", errno,
-					  strerror(errno));
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, error %i (%s)\n",
+					errno, strerror(errno));
 			return -1;
 		}
 	}
@@ -701,24 +1133,107 @@ vfio_type1_dma_map(int vfio_container_fd)
 }
 
 static int
-vfio_spapr_dma_map(int vfio_container_fd)
+vfio_type1_dma_map(int vfio_container_fd)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
+	return rte_memseg_walk(type1_map, &vfio_container_fd);
+}
+
+static int
+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
+{
+	struct vfio_iommu_type1_dma_map dma_map;
+	struct vfio_iommu_type1_dma_unmap dma_unmap;
+	int ret;
+
+	if (do_map != 0) {
+		memset(&dma_map, 0, sizeof(dma_map));
+		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+		dma_map.vaddr = vaddr;
+		dma_map.size = len;
+		dma_map.iova = iova;
+		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+				VFIO_DMA_MAP_FLAG_WRITE;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, error %i (%s)\n",
+				errno, strerror(errno));
+				return -1;
+		}
+
+	} else {
+		struct vfio_iommu_spapr_register_memory reg = {
+			.argsz = sizeof(reg),
+			.flags = 0
+		};
+		reg.vaddr = (uintptr_t) vaddr;
+		reg.size = len;
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 
-	struct vfio_iommu_spapr_register_memory reg = {
-		.argsz = sizeof(reg),
-		.flags = 0
+		memset(&dma_unmap, 0, sizeof(dma_unmap));
+		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+		dma_unmap.size = len;
+		dma_unmap.iova = iova;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+				&dma_unmap);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot clear DMA remapping, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	int *vfio_container_fd = arg;
+
+	return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 1);
+}
+
+struct spapr_walk_param {
+	uint64_t window_size;
+	uint64_t hugepage_sz;
+};
+static int
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_walk_param *param = arg;
+	uint64_t max = ms->iova + ms->len;
+
+	if (max > param->window_size) {
+		param->hugepage_sz = ms->hugepage_sz;
+		param->window_size = max;
+	}
+
+	return 0;
+}
+
+static int
+vfio_spapr_create_new_dma_window(int vfio_container_fd,
+		struct vfio_iommu_spapr_tce_create *create) {
+	struct vfio_iommu_spapr_tce_remove remove = {
+		.argsz = sizeof(remove),
 	};
 	struct vfio_iommu_spapr_tce_info info = {
 		.argsz = sizeof(info),
 	};
-	struct vfio_iommu_spapr_tce_create create = {
-		.argsz = sizeof(create),
-	};
-	struct vfio_iommu_spapr_tce_remove remove = {
-		.argsz = sizeof(remove),
-	};
+	int ret;
 
 	/* query spapr iommu info */
 	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
@@ -737,70 +1252,159 @@ vfio_spapr_dma_map(int vfio_container_fd)
 		return -1;
 	}
 
-	/* create DMA window from 0 to max(phys_addr + len) */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		if (ms[i].addr == NULL)
-			break;
-
-		create.window_size = RTE_MAX(create.window_size,
-				ms[i].iova + ms[i].len);
-	}
-
-	/* sPAPR requires window size to be a power of 2 */
-	create.window_size = rte_align64pow2(create.window_size);
-	create.page_shift = __builtin_ctzll(ms->hugepage_sz);
-	create.levels = 1;
-
-	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
+	/* create new DMA window */
+	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);
 	if (ret) {
 		RTE_LOG(ERR, EAL, "  cannot create new DMA window, "
 				"error %i (%s)\n", errno, strerror(errno));
 		return -1;
 	}
 
-	if (create.start_addr != 0) {
+	if (create->start_addr != 0) {
 		RTE_LOG(ERR, EAL, "  DMA window start address != 0\n");
 		return -1;
 	}
 
-	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
-	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-		struct vfio_iommu_type1_dma_map dma_map;
+	return 0;
+}
 
-		if (ms[i].addr == NULL)
-			break;
+static int
+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
+{
+	struct spapr_walk_param param;
+	struct vfio_iommu_spapr_tce_create create = {
+		.argsz = sizeof(create),
+	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
+	int i, ret = 0;
 
-		reg.vaddr = (uintptr_t) ms[i].addr;
-		reg.size = ms[i].len;
-		ret = ioctl(vfio_container_fd,
-			VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot register vaddr for IOMMU, "
-				"error %i (%s)\n", errno, strerror(errno));
-			return -1;
-		}
+	vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid container fd!\n");
+		return -1;
+	}
 
-		memset(&dma_map, 0, sizeof(dma_map));
-		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
-		dma_map.vaddr = ms[i].addr_64;
-		dma_map.size = ms[i].len;
-		if (rte_eal_iova_mode() == RTE_IOVA_VA)
-			dma_map.iova = dma_map.vaddr;
-		else
-			dma_map.iova = ms[i].iova;
-		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
-				 VFIO_DMA_MAP_FLAG_WRITE;
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
-		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+	/* check if window size needs to be adjusted */
+	memset(&param, 0, sizeof(param));
 
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
-				"error %i (%s)\n", errno, strerror(errno));
-			return -1;
+	/* we're inside a callback so use thread-unsafe version */
+	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+				&param) < 0) {
+		RTE_LOG(ERR, EAL, "Could not get window size\n");
+		ret = -1;
+		goto out;
+	}
+
+	/* also check user maps */
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		uint64_t max = user_mem_maps->maps[i].iova +
+				user_mem_maps->maps[i].len;
+		create.window_size = RTE_MAX(create.window_size, max);
+	}
+
+	/* sPAPR requires window size to be a power of 2 */
+	create.window_size = rte_align64pow2(param.window_size);
+	create.page_shift = __builtin_ctzll(param.hugepage_sz);
+	create.levels = 1;
+
+	if (do_map) {
+		void *addr;
+		/* re-create window and remap the entire memory */
+		if (iova > create.window_size) {
+			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
+					&create) < 0) {
+				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+				ret = -1;
+				goto out;
+			}
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+					&vfio_container_fd) < 0) {
+				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
+				ret = -1;
+				goto out;
+			}
+			/* remap all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						1)) {
+					RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
 		}
 
+		/* now that we've remapped all of the memory that was present
+		 * before, map the segment that we were requested to map.
+		 *
+		 * however, if we were called by the callback, the memory we
+		 * were called with was already in the memseg list, so previous
+		 * mapping should've mapped that segment already.
+		 *
+		 * virt2memseg_list is a relatively cheap check, so use that. if
+		 * memory is within any memseg list, it's a memseg, so it's
+		 * already mapped.
+		 */
+		addr = (void *)(uintptr_t)vaddr;
+		if (rte_mem_virt2memseg_list(addr) == NULL &&
+				vfio_spapr_dma_do_map(vfio_container_fd,
+					vaddr, iova, len, 1) < 0) {
+			RTE_LOG(ERR, EAL, "Could not map segment\n");
+			ret = -1;
+			goto out;
+		}
+	} else {
+		/* for unmap, check if iova within DMA window */
+		if (iova > create.window_size) {
+			RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap");
+			ret = -1;
+			goto out;
+		}
+
+		vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
+	}
+out:
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+	return ret;
+}
+
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+	struct vfio_iommu_spapr_tce_create create = {
+		.argsz = sizeof(create),
+	};
+	struct spapr_walk_param param;
+
+	memset(&param, 0, sizeof(param));
+
+	/* create DMA window from 0 to max(phys_addr + len) */
+	rte_memseg_walk(vfio_spapr_window_size_walk, &param);
+
+	/* sPAPR requires window size to be a power of 2 */
+	create.window_size = rte_align64pow2(param.window_size);
+	create.page_shift = __builtin_ctzll(param.hugepage_sz);
+	create.levels = 1;
+
+	if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) {
+		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+		return -1;
 	}
 
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
+		return -1;
+
 	return 0;
 }
 
@@ -811,6 +1415,175 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
 	return 0;
 }
 
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+			 uint64_t __rte_unused vaddr,
+			 uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			 int __rte_unused do_map)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+static int
+vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
+{
+	const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type;
+
+	if (!t) {
+		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
+		rte_errno = ENODEV;
+		return -1;
+	}
+
+	if (!t->dma_user_map_func) {
+		RTE_LOG(ERR, EAL,
+			"  VFIO custom DMA region maping not supported by IOMMU %s\n",
+			t->name);
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova,
+			len, do_map);
+}
+
+static int
+container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct user_mem_map *new_map;
+	struct user_mem_maps *user_mem_maps;
+	int ret = 0;
+
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
+	if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+		RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto out;
+	}
+	/* map the entry */
+	if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
+		/* technically, this will fail if there are currently no devices
+		 * plugged in, even if a device were added later, this mapping
+		 * might have succeeded. however, since we cannot verify if this
+		 * is a valid mapping without having a device attached, consider
+		 * this to be unsupported, because we can't just store any old
+		 * mapping and pollute list of active mappings willy-nilly.
+		 */
+		RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
+		ret = -1;
+		goto out;
+	}
+	/* create new user mem map entry */
+	new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+	new_map->addr = vaddr;
+	new_map->iova = iova;
+	new_map->len = len;
+
+	compact_user_maps(user_mem_maps);
+out:
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+	return ret;
+}
+
+static int
+container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct user_mem_map *map, *new_map = NULL;
+	struct user_mem_maps *user_mem_maps;
+	int ret = 0;
+
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+	/* find our mapping */
+	map = find_user_mem_map(user_mem_maps, vaddr, iova, len);
+	if (!map) {
+		RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
+		rte_errno = EINVAL;
+		ret = -1;
+		goto out;
+	}
+	if (map->addr != vaddr || map->iova != iova || map->len != len) {
+		/* we're partially unmapping a previously mapped region, so we
+		 * need to split entry into two.
+		 */
+		if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+			RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
+			rte_errno = ENOMEM;
+			ret = -1;
+			goto out;
+		}
+		new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+	}
+
+	/* unmap the entry */
+	if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) {
+		/* there may not be any devices plugged in, so unmapping will
+		 * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
+		 * stop us from removing the mapping, as the assumption is we
+		 * won't be needing this memory any more and thus will want to
+		 * prevent it from being remapped again on hotplug. so, only
+		 * fail if we indeed failed to unmap (e.g. if the mapping was
+		 * within our mapped range but had invalid alignment).
+		 */
+		if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+			RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n");
+			ret = -1;
+			goto out;
+		} else {
+			RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n");
+		}
+	}
+	/* remove map from the list of active mappings */
+	if (new_map != NULL) {
+		adjust_map(map, new_map, vaddr, len);
+
+		/* if we've created a new map by splitting, sort everything */
+		if (!is_null_map(new_map)) {
+			compact_user_maps(user_mem_maps);
+		} else {
+			/* we've created a new mapping, but it was unused */
+			user_mem_maps->n_maps--;
+		}
+	} else {
+		memset(map, 0, sizeof(*map));
+		compact_user_maps(user_mem_maps);
+		user_mem_maps->n_maps--;
+	}
+
+out:
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+	return ret;
+}
+
+int
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return container_dma_map(default_vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return container_dma_unmap(default_vfio_cfg, vaddr, iova, len);
+}
+
 int
 rte_vfio_noiommu_is_enabled(void)
 {
@@ -843,4 +1616,299 @@ rte_vfio_noiommu_is_enabled(void)
 	return c == 'Y';
 }
 
-#endif
+int
+rte_vfio_container_create(void)
+{
+	int i;
+
+	/* Find an empty slot to store new vfio config */
+	for (i = 1; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == -1)
+			break;
+	}
+
+	if (i == VFIO_MAX_CONTAINERS) {
+		RTE_LOG(ERR, EAL, "exceed max vfio container limit\n");
+		return -1;
+	}
+
+	vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd();
+	if (vfio_cfgs[i].vfio_container_fd < 0) {
+		RTE_LOG(NOTICE, EAL, "fail to create a new container\n");
+		return -1;
+	}
+
+	return vfio_cfgs[i].vfio_container_fd;
+}
+
+int __rte_experimental
+rte_vfio_container_destroy(int container_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num != -1)
+			rte_vfio_container_group_unbind(container_fd,
+				vfio_cfg->vfio_groups[i].group_num);
+
+	close(container_fd);
+	vfio_cfg->vfio_container_fd = -1;
+	vfio_cfg->vfio_active_groups = 0;
+	vfio_cfg->vfio_iommu_type = NULL;
+
+	return 0;
+}
+
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	struct vfio_group *cur_grp;
+	int vfio_group_fd;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	/* Check room for new group */
+	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+		return -1;
+	}
+
+	/* Get an index for the new group */
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num == -1) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+		return -1;
+	}
+
+	vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+	if (vfio_group_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+		return -1;
+	}
+	cur_grp->group_num = iommu_group_num;
+	cur_grp->fd = vfio_group_fd;
+	cur_grp->devices = 0;
+	vfio_cfg->vfio_active_groups++;
+
+	return vfio_group_fd;
+}
+
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	struct vfio_group *cur_grp = NULL;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+		if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+	}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
+		RTE_LOG(ERR, EAL, "Specified group number not found\n");
+		return -1;
+	}
+
+	if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) {
+		RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"
+			" iommu_group_num %d\n", iommu_group_num);
+		return -1;
+	}
+	cur_grp->group_num = -1;
+	cur_grp->fd = -1;
+	cur_grp->devices = 0;
+	vfio_cfg->vfio_active_groups--;
+
+	return 0;
+}
+
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct vfio_config *vfio_cfg;
+
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	return container_dma_map(vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct vfio_config *vfio_cfg;
+
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	return container_dma_unmap(vfio_cfg, vaddr, iova, len);
+}
+
+#else
+
+int
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+		  __rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+		    __rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr,
+		__rte_unused int *vfio_dev_fd,
+		__rte_unused struct vfio_device_info *device_info)
+{
+	return -1;
+}
+
+int
+rte_vfio_release_device(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr, __rte_unused int fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_enable(__rte_unused const char *modname)
+{
+	return -1;
+}
+
+int
+rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+	return -1;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr,
+		__rte_unused int *iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+		__rte_unused uint64_t vaddr,
+		__rte_unused uint64_t iova,
+		__rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+		__rte_unused uint64_t vaddr,
+		__rte_unused uint64_t iova,
+		__rte_unused uint64_t len)
+{
+	return -1;
+}
+
+#endif /* VFIO_PRESENT */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 80595773..68d4750a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -19,6 +19,7 @@
 
 #ifdef VFIO_PRESENT
 
+#include <stdint.h>
 #include <linux/vfio.h>
 
 #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
@@ -26,6 +27,7 @@
 #ifndef VFIO_SPAPR_TCE_v2_IOMMU
 #define RTE_VFIO_SPAPR 7
 #define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18)
 #define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
 #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
 
@@ -79,49 +81,37 @@ struct vfio_iommu_spapr_tce_info {
 #define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
-#define RTE_VFIO_NOIOMMU 8
-#else
-#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
-#endif
-
 #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS
-
-/*
- * Function prototypes for VFIO multiprocess sync functions
- */
-int vfio_mp_sync_send_request(int socket, int req);
-int vfio_mp_sync_receive_request(int socket);
-int vfio_mp_sync_send_fd(int socket, int fd);
-int vfio_mp_sync_receive_fd(int socket);
-int vfio_mp_sync_connect_to_primary(void);
+#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS
 
 /*
  * we don't need to store device fd's anywhere since they can be obtained from
  * the group fd via an ioctl() call.
  */
 struct vfio_group {
-	int group_no;
+	int group_num;
 	int fd;
 	int devices;
 };
 
-struct vfio_config {
-	int vfio_enabled;
-	int vfio_container_fd;
-	int vfio_active_groups;
-	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
-};
-
 /* DMA mapping function prototype.
  * Takes VFIO container fd as a parameter.
  * Returns 0 on success, -1 on error.
  * */
 typedef int (*vfio_dma_func_t)(int);
 
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map);
+
 struct vfio_iommu_type {
 	int type_id;
 	const char *name;
+	vfio_dma_user_func_t dma_user_map_func;
 	vfio_dma_func_t dma_map_func;
 };
 
@@ -133,30 +123,22 @@ vfio_set_iommu_type(int vfio_container_fd);
 int
 vfio_has_supported_extensions(int vfio_container_fd);
 
-/* open container fd or get an existing one */
-int
-vfio_get_container_fd(void);
-
-/* parse IOMMU group number for a device
- * returns 1 on success, -1 for errors, 0 for non-existent group
- */
-int
-vfio_get_group_no(const char *sysfs_base,
-		const char *dev_addr, int *iommu_group_no);
-
-/* open group fd or get an existing one */
-int
-vfio_get_group_fd(int iommu_group_no);
-
 int vfio_mp_sync_setup(void);
 
+#define EAL_VFIO_MP "eal_vfio_mp_sync"
+
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
-#define SOCKET_CLR_GROUP 0x300
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
 
+struct vfio_mp_param {
+	int req;
+	int result;
+	int group_num;
+};
+
 #endif /* VFIO_PRESENT */
 
 #endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 7cc3c152..680a24aa 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -1,32 +1,16 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
+#include <unistd.h>
 #include <string.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <pthread.h>
-
-/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
-#ifdef __USE_MISC
-#define REMOVED_USE_MISC
-#undef __USE_MISC
-#endif
-#include <sys/un.h>
-/* make sure we redefine __USE_MISC only if it was previously undefined */
-#ifdef REMOVED_USE_MISC
-#define __USE_MISC
-#undef REMOVED_USE_MISC
-#endif
 
+#include <rte_compat.h>
 #include <rte_log.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
 #include <rte_vfio.h>
+#include <rte_eal.h>
 
-#include "eal_filesystem.h"
 #include "eal_vfio.h"
-#include "eal_thread.h"
 
 /**
  * @file
@@ -37,358 +21,70 @@
 
 #ifdef VFIO_PRESENT
 
-#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
-#define CMSGLEN (CMSG_LEN(sizeof(int)))
-#define FD_TO_CMSGHDR(fd, chdr) \
-		do {\
-			(chdr).cmsg_len = CMSGLEN;\
-			(chdr).cmsg_level = SOL_SOCKET;\
-			(chdr).cmsg_type = SCM_RIGHTS;\
-			memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
-		} while (0)
-#define CMSGHDR_TO_FD(chdr, fd) \
-			memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
-
-static pthread_t socket_thread;
-static int mp_socket_fd;
-
-
-/* get socket path (/var/run if root, $HOME otherwise) */
-static void
-get_socket_path(char *buffer, int bufsz)
-{
-	const char *dir = "/var/run";
-	const char *home_dir = getenv("HOME");
-
-	if (getuid() != 0 && home_dir != NULL)
-		dir = home_dir;
-
-	/* use current prefix as file path */
-	snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
-			internal_config.hugefile_prefix);
-}
-
-
-
-/*
- * data flow for socket comm protocol:
- * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
- * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
- * 2. server receives message
- * 2a. in case of invalid group, SOCKET_ERR is sent back to client
- * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
- * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
- *
- * in case of any error, socket is closed.
- */
-
-/* send a request, return -1 on error */
-int
-vfio_mp_sync_send_request(int socket, int req)
-{
-	struct msghdr hdr;
-	struct iovec iov;
-	int buf;
-	int ret;
-
-	memset(&hdr, 0, sizeof(hdr));
-
-	buf = req;
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-
-	ret = sendmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-	return 0;
-}
-
-/* receive a request and return it */
-int
-vfio_mp_sync_receive_request(int socket)
-{
-	int buf;
-	struct msghdr hdr;
-	struct iovec iov;
-	int ret, req;
-
-	memset(&hdr, 0, sizeof(hdr));
-
-	buf = SOCKET_ERR;
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-
-	ret = recvmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-
-	req = buf;
-
-	return req;
-}
-
-/* send OK in message, fd in control message */
-int
-vfio_mp_sync_send_fd(int socket, int fd)
+static int
+vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
 {
-	int buf;
-	struct msghdr hdr;
-	struct cmsghdr *chdr;
-	char chdr_buf[CMSGLEN];
-	struct iovec iov;
+	int fd = -1;
 	int ret;
+	struct rte_mp_msg reply;
+	struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param;
+	const struct vfio_mp_param *m =
+		(const struct vfio_mp_param *)msg->param;
 
-	chdr = (struct cmsghdr *) chdr_buf;
-	memset(chdr, 0, sizeof(chdr_buf));
-	memset(&hdr, 0, sizeof(hdr));
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-	hdr.msg_control = chdr;
-	hdr.msg_controllen = CMSGLEN;
-
-	buf = SOCKET_OK;
-	FD_TO_CMSGHDR(fd, *chdr);
-
-	ret = sendmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-	return 0;
-}
-
-/* receive OK in message, fd in control message */
-int
-vfio_mp_sync_receive_fd(int socket)
-{
-	int buf;
-	struct msghdr hdr;
-	struct cmsghdr *chdr;
-	char chdr_buf[CMSGLEN];
-	struct iovec iov;
-	int ret, req, fd;
-
-	buf = SOCKET_ERR;
-
-	chdr = (struct cmsghdr *) chdr_buf;
-	memset(chdr, 0, sizeof(chdr_buf));
-	memset(&hdr, 0, sizeof(hdr));
-
-	hdr.msg_iov = &iov;
-	hdr.msg_iovlen = 1;
-	iov.iov_base = (char *) &buf;
-	iov.iov_len = sizeof(buf);
-	hdr.msg_control = chdr;
-	hdr.msg_controllen = CMSGLEN;
-
-	ret = recvmsg(socket, &hdr, 0);
-	if (ret < 0)
-		return -1;
-
-	req = buf;
-
-	if (req != SOCKET_OK)
-		return -1;
-
-	CMSGHDR_TO_FD(*chdr, fd);
-
-	return fd;
-}
-
-/* connect socket_fd in secondary process to the primary process's socket */
-int
-vfio_mp_sync_connect_to_primary(void)
-{
-	struct sockaddr_un addr;
-	socklen_t sockaddr_len;
-	int socket_fd;
-
-	/* set up a socket */
-	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+	if (msg->len_param != sizeof(*m)) {
+		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
 	}
 
-	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
-	addr.sun_family = AF_UNIX;
-
-	sockaddr_len = sizeof(struct sockaddr_un);
-
-	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
-		return socket_fd;
-
-	/* if connect failed */
-	close(socket_fd);
-	return -1;
-}
-
+	memset(&reply, 0, sizeof(reply));
 
-
-/*
- * socket listening thread for primary process
- */
-static __attribute__((noreturn)) void *
-vfio_mp_sync_thread(void __rte_unused * arg)
-{
-	int ret, fd, vfio_data;
-
-	/* wait for requests on the socket */
-	for (;;) {
-		int conn_sock;
-		struct sockaddr_un addr;
-		socklen_t sockaddr_len = sizeof(addr);
-
-		/* this is a blocking call */
-		conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
-				&sockaddr_len);
-
-		/* just restart on error */
-		if (conn_sock == -1)
-			continue;
-
-		/* set socket to linger after close */
-		struct linger l;
-		l.l_onoff = 1;
-		l.l_linger = 60;
-
-		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
-			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
-					"on listen socket (%s)\n", strerror(errno));
-
-		ret = vfio_mp_sync_receive_request(conn_sock);
-
-		switch (ret) {
-		case SOCKET_REQ_CONTAINER:
-			fd = vfio_get_container_fd();
-			if (fd < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
-			else
-				vfio_mp_sync_send_fd(conn_sock, fd);
-			if (fd >= 0)
-				close(fd);
-			break;
-		case SOCKET_REQ_GROUP:
-			/* wait for group number */
-			vfio_data = vfio_mp_sync_receive_request(conn_sock);
-			if (vfio_data < 0) {
-				close(conn_sock);
-				continue;
-			}
-
-			fd = vfio_get_group_fd(vfio_data);
-
-			if (fd < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+	switch (m->req) {
+	case SOCKET_REQ_GROUP:
+		r->req = SOCKET_REQ_GROUP;
+		r->group_num = m->group_num;
+		fd = rte_vfio_get_group_fd(m->group_num);
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else if (fd == 0)
 			/* if VFIO group exists but isn't bound to VFIO driver */
-			else if (fd == 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+			r->result = SOCKET_NO_FD;
+		else {
 			/* if group exists and is bound to VFIO driver */
-			else {
-				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
-				vfio_mp_sync_send_fd(conn_sock, fd);
-			}
-			break;
-		case SOCKET_CLR_GROUP:
-			/* wait for group fd */
-			vfio_data = vfio_mp_sync_receive_request(conn_sock);
-			if (vfio_data < 0) {
-				close(conn_sock);
-				continue;
-			}
-
-			ret = rte_vfio_clear_group(vfio_data);
-
-			if (ret < 0)
-				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
-			else
-				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
-			break;
-		default:
-			vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
-			break;
+			r->result = SOCKET_OK;
+			reply.num_fds = 1;
+			reply.fds[0] = fd;
 		}
-		close(conn_sock);
-	}
-}
-
-static int
-vfio_mp_sync_socket_setup(void)
-{
-	int ret, socket_fd;
-	struct sockaddr_un addr;
-	socklen_t sockaddr_len;
-
-	/* set up a socket */
-	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
-		return -1;
-	}
-
-	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
-	addr.sun_family = AF_UNIX;
-
-	sockaddr_len = sizeof(struct sockaddr_un);
-
-	unlink(addr.sun_path);
-
-	ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
-	if (ret) {
-		RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
-		close(socket_fd);
-		return -1;
-	}
-
-	ret = listen(socket_fd, 50);
-	if (ret) {
-		RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
-		close(socket_fd);
+		break;
+	case SOCKET_REQ_CONTAINER:
+		r->req = SOCKET_REQ_CONTAINER;
+		fd = rte_vfio_get_container_fd();
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else {
+			r->result = SOCKET_OK;
+			reply.num_fds = 1;
+			reply.fds[0] = fd;
+		}
+		break;
+	default:
+		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
 	}
 
-	/* save the socket in local configuration */
-	mp_socket_fd = socket_fd;
+	strcpy(reply.name, EAL_VFIO_MP);
+	reply.len_param = sizeof(*r);
 
-	return 0;
+	ret = rte_mp_reply(&reply, peer);
+	if (m->req == SOCKET_REQ_CONTAINER && fd >= 0)
+		close(fd);
+	return ret;
 }
 
-/*
- * set up a local socket and tell it to listen for incoming connections
- */
 int
 vfio_mp_sync_setup(void)
 {
-	int ret;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-
-	if (vfio_mp_sync_socket_setup() < 0) {
-		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
-		return -1;
-	}
-
-	ret = pthread_create(&socket_thread, NULL,
-			vfio_mp_sync_thread, NULL);
-	if (ret) {
-		RTE_LOG(ERR, EAL,
-			"Failed to create thread for communication with secondary processes!\n");
-		close(mp_socket_fd);
-		return -1;
-	}
-
-	/* Set thread_name for aid in debugging. */
-	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
-	ret = rte_thread_setname(socket_thread, thread_name);
-	if (ret)
-		RTE_LOG(DEBUG, EAL,
-			"Failed to set thread name for secondary processes!\n");
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		return rte_mp_action_register(EAL_VFIO_MP, vfio_mp_primary);
 
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/meson.build b/lib/librte_eal/linuxapp/eal/meson.build
index 03974ff2..6e31c2aa 100644
--- a/lib/librte_eal/linuxapp/eal/meson.build
+++ b/lib/librte_eal/linuxapp/eal/meson.build
@@ -7,9 +7,11 @@ install_subdir('include/exec-env', install_dir: get_option('includedir'))
 env_objs = []
 env_headers = []
 env_sources = files('eal_alarm.c',
+		'eal_cpuflags.c',
 		'eal_debug.c',
 		'eal_hugepage_info.c',
 		'eal_interrupts.c',
+		'eal_memalloc.c',
 		'eal_lcore.c',
 		'eal_log.c',
 		'eal_thread.c',
@@ -18,8 +20,10 @@ env_sources = files('eal_alarm.c',
 		'eal_vfio_mp_sync.c',
 		'eal.c',
 		'eal_memory.c',
+		'eal_dev.c',
 )
 
+deps += ['kvargs']
 if has_libnuma == 1
 	dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
 endif
diff --git a/lib/librte_eal/linuxapp/igb_uio/Kbuild b/lib/librte_eal/linuxapp/igb_uio/Kbuild
deleted file mode 100644
index 98c98fe5..00000000
--- a/lib/librte_eal/linuxapp/igb_uio/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-obj-m := igb_uio.o
diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile
deleted file mode 100644
index f83bcc7c..00000000
--- a/lib/librte_eal/linuxapp/igb_uio/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = igb_uio
-MODULE_PATH = drivers/net/igb_uio
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -Winline -Wall -Werror
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := igb_uio.c
-
-include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
deleted file mode 100644
index ce456d4b..00000000
--- a/lib/librte_eal/linuxapp/igb_uio/compat.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Minimal wrappers to allow compiling igb_uio on older kernels.
- */
-
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
-#define pci_cfg_access_lock   pci_block_user_cfg_access
-#define pci_cfg_access_unlock pci_unblock_user_cfg_access
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
-#define HAVE_PTE_MASK_PAGE_IOMAP
-#endif
-
-#ifndef PCI_MSIX_ENTRY_SIZE
-#define PCI_MSIX_ENTRY_SIZE            16
-#define PCI_MSIX_ENTRY_VECTOR_CTRL     12
-#define PCI_MSIX_ENTRY_CTRL_MASKBIT    1
-#endif
-
-/*
- * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition
- * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not
- * PCI_MSIX_ENTRY_CTRL_MASKBIT
- */
-#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT
-#define PCI_MSIX_ENTRY_CTRL_MASKBIT    1
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \
-	(!(defined(RHEL_RELEASE_CODE) && \
-	 RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9)))
-
-static int pci_num_vf(struct pci_dev *dev)
-{
-	struct iov {
-		int pos;
-		int nres;
-		u32 cap;
-		u16 ctrl;
-		u16 total;
-		u16 initial;
-		u16 nr_virtfn;
-	} *iov = (struct iov *)dev->sriov;
-
-	if (!dev->is_physfn)
-		return 0;
-
-	return iov->nr_virtfn;
-}
-
-#endif /* < 2.6.34 */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
-	(!(defined(RHEL_RELEASE_CODE) && \
-	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
-
-#define kstrtoul strict_strtoul
-
-#endif /* < 2.6.39 */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \
-	(!(defined(RHEL_RELEASE_CODE) && \
-	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3)))
-
-/* Check if INTX works to control irq's.
- * Set's INTX_DISABLE flag and reads it back
- */
-static bool pci_intx_mask_supported(struct pci_dev *pdev)
-{
-	bool mask_supported = false;
-	uint16_t orig, new;
-
-	pci_block_user_cfg_access(pdev);
-	pci_read_config_word(pdev, PCI_COMMAND, &orig);
-	pci_write_config_word(pdev, PCI_COMMAND,
-			      orig ^ PCI_COMMAND_INTX_DISABLE);
-	pci_read_config_word(pdev, PCI_COMMAND, &new);
-
-	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
-		dev_err(&pdev->dev, "Command register changed from "
-			"0x%x to 0x%x: driver or hardware bug?\n", orig, new);
-	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
-		mask_supported = true;
-		pci_write_config_word(pdev, PCI_COMMAND, orig);
-	}
-	pci_unblock_user_cfg_access(pdev);
-
-	return mask_supported;
-}
-
-static bool pci_check_and_mask_intx(struct pci_dev *pdev)
-{
-	bool pending;
-	uint32_t status;
-
-	pci_block_user_cfg_access(pdev);
-	pci_read_config_dword(pdev, PCI_COMMAND, &status);
-
-	/* interrupt is not ours, goes to out */
-	pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0);
-	if (pending) {
-		uint16_t old, new;
-
-		old = status;
-		if (status != 0)
-			new = old & (~PCI_COMMAND_INTX_DISABLE);
-		else
-			new = old | PCI_COMMAND_INTX_DISABLE;
-
-		if (old != new)
-			pci_write_config_word(pdev, PCI_COMMAND, new);
-	}
-	pci_unblock_user_cfg_access(pdev);
-
-	return pending;
-}
-
-#endif /* < 3.3.0 */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
-#define HAVE_ALLOC_IRQ_VECTORS 1
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
-#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
-#define HAVE_PCI_MSI_MASK_IRQ 1
-#endif
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
deleted file mode 100644
index 4cae4dd2..00000000
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ /dev/null
@@ -1,643 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*-
- * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/uio_driver.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/msi.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-
-#include <rte_pci_dev_features.h>
-
-#include "compat.h"
-
-/**
- * A structure describing the private information for a uio device.
- */
-struct rte_uio_pci_dev {
-	struct uio_info info;
-	struct pci_dev *pdev;
-	enum rte_intr_mode mode;
-	struct mutex lock;
-	int refcnt;
-};
-
-static char *intr_mode;
-static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
-/* sriov sysfs */
-static ssize_t
-show_max_vfs(struct device *dev, struct device_attribute *attr,
-	     char *buf)
-{
-	return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
-}
-
-static ssize_t
-store_max_vfs(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
-{
-	int err = 0;
-	unsigned long max_vfs;
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	if (0 != kstrtoul(buf, 0, &max_vfs))
-		return -EINVAL;
-
-	if (0 == max_vfs)
-		pci_disable_sriov(pdev);
-	else if (0 == pci_num_vf(pdev))
-		err = pci_enable_sriov(pdev, max_vfs);
-	else /* do nothing if change max_vfs number */
-		err = -EINVAL;
-
-	return err ? err : count;
-}
-
-static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
-
-static struct attribute *dev_attrs[] = {
-	&dev_attr_max_vfs.attr,
-	NULL,
-};
-
-static const struct attribute_group dev_attr_grp = {
-	.attrs = dev_attrs,
-};
-
-#ifndef HAVE_PCI_MSI_MASK_IRQ
-/*
- * It masks the msix on/off of generating MSI-X messages.
- */
-static void
-igbuio_msix_mask_irq(struct msi_desc *desc, s32 state)
-{
-	u32 mask_bits = desc->masked;
-	unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-						PCI_MSIX_ENTRY_VECTOR_CTRL;
-
-	if (state != 0)
-		mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
-	else
-		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
-
-	if (mask_bits != desc->masked) {
-		writel(mask_bits, desc->mask_base + offset);
-		readl(desc->mask_base);
-		desc->masked = mask_bits;
-	}
-}
-
-/*
- * It masks the msi on/off of generating MSI messages.
- */
-static void
-igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state)
-{
-	u32 mask_bits = desc->masked;
-	u32 offset = desc->irq - pdev->irq;
-	u32 mask = 1 << offset;
-
-	if (!desc->msi_attrib.maskbit)
-		return;
-
-	if (state != 0)
-		mask_bits &= ~mask;
-	else
-		mask_bits |= mask;
-
-	if (mask_bits != desc->masked) {
-		pci_write_config_dword(pdev, desc->mask_pos, mask_bits);
-		desc->masked = mask_bits;
-	}
-}
-
-static void
-igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state)
-{
-	struct msi_desc *desc;
-	struct list_head *msi_list;
-
-#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE
-	msi_list = &pdev->dev.msi_list;
-#else
-	msi_list = &pdev->msi_list;
-#endif
-
-	if (mode == RTE_INTR_MODE_MSIX) {
-		list_for_each_entry(desc, msi_list, list)
-			igbuio_msix_mask_irq(desc, irq_state);
-	} else if (mode == RTE_INTR_MODE_MSI) {
-		list_for_each_entry(desc, msi_list, list)
-			igbuio_msi_mask_irq(pdev, desc, irq_state);
-	}
-}
-#endif
-
-/**
- * This is the irqcontrol callback to be registered to uio_info.
- * It can be used to disable/enable interrupt from user space processes.
- *
- * @param info
- *  pointer to uio_info.
- * @param irq_state
- *  state value. 1 to enable interrupt, 0 to disable interrupt.
- *
- * @return
- *  - On success, 0.
- *  - On failure, a negative value.
- */
-static int
-igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
-{
-	struct rte_uio_pci_dev *udev = info->priv;
-	struct pci_dev *pdev = udev->pdev;
-
-#ifdef HAVE_PCI_MSI_MASK_IRQ
-	struct irq_data *irq = irq_get_irq_data(udev->info.irq);
-#endif
-
-	pci_cfg_access_lock(pdev);
-
-	if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) {
-#ifdef HAVE_PCI_MSI_MASK_IRQ
-		if (irq_state == 1)
-			pci_msi_unmask_irq(irq);
-		else
-			pci_msi_mask_irq(irq);
-#else
-		igbuio_mask_irq(pdev, udev->mode, irq_state);
-#endif
-	}
-
-	if (udev->mode == RTE_INTR_MODE_LEGACY)
-		pci_intx(pdev, !!irq_state);
-
-	pci_cfg_access_unlock(pdev);
-
-	return 0;
-}
-
-/**
- * This is interrupt handler which will check if the interrupt is for the right device.
- * If yes, disable it here and will be enable later.
- */
-static irqreturn_t
-igbuio_pci_irqhandler(int irq, void *dev_id)
-{
-	struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id;
-	struct uio_info *info = &udev->info;
-
-	/* Legacy mode need to mask in hardware */
-	if (udev->mode == RTE_INTR_MODE_LEGACY &&
-	    !pci_check_and_mask_intx(udev->pdev))
-		return IRQ_NONE;
-
-	uio_event_notify(info);
-
-	/* Message signal mode, no share IRQ and automasked */
-	return IRQ_HANDLED;
-}
-
-static int
-igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
-{
-	int err = 0;
-#ifndef HAVE_ALLOC_IRQ_VECTORS
-	struct msix_entry msix_entry;
-#endif
-
-	switch (igbuio_intr_mode_preferred) {
-	case RTE_INTR_MODE_MSIX:
-		/* Only 1 msi-x vector needed */
-#ifndef HAVE_ALLOC_IRQ_VECTORS
-		msix_entry.entry = 0;
-		if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
-			dev_dbg(&udev->pdev->dev, "using MSI-X");
-			udev->info.irq_flags = IRQF_NO_THREAD;
-			udev->info.irq = msix_entry.vector;
-			udev->mode = RTE_INTR_MODE_MSIX;
-			break;
-		}
-#else
-		if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
-			dev_dbg(&udev->pdev->dev, "using MSI-X");
-			udev->info.irq_flags = IRQF_NO_THREAD;
-			udev->info.irq = pci_irq_vector(udev->pdev, 0);
-			udev->mode = RTE_INTR_MODE_MSIX;
-			break;
-		}
-#endif
-
-	/* fall back to MSI */
-	case RTE_INTR_MODE_MSI:
-#ifndef HAVE_ALLOC_IRQ_VECTORS
-		if (pci_enable_msi(udev->pdev) == 0) {
-			dev_dbg(&udev->pdev->dev, "using MSI");
-			udev->info.irq_flags = IRQF_NO_THREAD;
-			udev->info.irq = udev->pdev->irq;
-			udev->mode = RTE_INTR_MODE_MSI;
-			break;
-		}
-#else
-		if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
-			dev_dbg(&udev->pdev->dev, "using MSI");
-			udev->info.irq_flags = IRQF_NO_THREAD;
-			udev->info.irq = pci_irq_vector(udev->pdev, 0);
-			udev->mode = RTE_INTR_MODE_MSI;
-			break;
-		}
-#endif
-	/* fall back to INTX */
-	case RTE_INTR_MODE_LEGACY:
-		if (pci_intx_mask_supported(udev->pdev)) {
-			dev_dbg(&udev->pdev->dev, "using INTX");
-			udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
-			udev->info.irq = udev->pdev->irq;
-			udev->mode = RTE_INTR_MODE_LEGACY;
-			break;
-		}
-		dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
-	/* fall back to no IRQ */
-	case RTE_INTR_MODE_NONE:
-		udev->mode = RTE_INTR_MODE_NONE;
-		udev->info.irq = UIO_IRQ_NONE;
-		break;
-
-	default:
-		dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
-			igbuio_intr_mode_preferred);
-		udev->info.irq = UIO_IRQ_NONE;
-		err = -EINVAL;
-	}
-
-	if (udev->info.irq != UIO_IRQ_NONE)
-		err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
-				  udev->info.irq_flags, udev->info.name,
-				  udev);
-	dev_info(&udev->pdev->dev, "uio device registered with irq %ld\n",
-		 udev->info.irq);
-
-	return err;
-}
-
-static void
-igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
-{
-	if (udev->info.irq) {
-		free_irq(udev->info.irq, udev);
-		udev->info.irq = 0;
-	}
-
-#ifndef HAVE_ALLOC_IRQ_VECTORS
-	if (udev->mode == RTE_INTR_MODE_MSIX)
-		pci_disable_msix(udev->pdev);
-	if (udev->mode == RTE_INTR_MODE_MSI)
-		pci_disable_msi(udev->pdev);
-#else
-	if (udev->mode == RTE_INTR_MODE_MSIX ||
-	    udev->mode == RTE_INTR_MODE_MSI)
-		pci_free_irq_vectors(udev->pdev);
-#endif
-}
-
-
-/**
- * This gets called while opening uio device file.
- */
-static int
-igbuio_pci_open(struct uio_info *info, struct inode *inode)
-{
-	struct rte_uio_pci_dev *udev = info->priv;
-	struct pci_dev *dev = udev->pdev;
-	int err;
-
-	mutex_lock(&udev->lock);
-	if (++udev->refcnt > 1) {
-		mutex_unlock(&udev->lock);
-		return 0;
-	}
-
-	/* set bus master, which was cleared by the reset function */
-	pci_set_master(dev);
-
-	/* enable interrupts */
-	err = igbuio_pci_enable_interrupts(udev);
-	mutex_unlock(&udev->lock);
-	if (err) {
-		dev_err(&dev->dev, "Enable interrupt fails\n");
-		return err;
-	}
-	return 0;
-}
-
-static int
-igbuio_pci_release(struct uio_info *info, struct inode *inode)
-{
-	struct rte_uio_pci_dev *udev = info->priv;
-	struct pci_dev *dev = udev->pdev;
-
-	mutex_lock(&udev->lock);
-	if (--udev->refcnt > 0) {
-		mutex_unlock(&udev->lock);
-		return 0;
-	}
-
-	/* disable interrupts */
-	igbuio_pci_disable_interrupts(udev);
-
-	/* stop the device from further DMA */
-	pci_clear_master(dev);
-
-	mutex_unlock(&udev->lock);
-	return 0;
-}
-
-/* Remap pci resources described by bar #pci_bar in uio resource n. */
-static int
-igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
-		       int n, int pci_bar, const char *name)
-{
-	unsigned long addr, len;
-	void *internal_addr;
-
-	if (n >= ARRAY_SIZE(info->mem))
-		return -EINVAL;
-
-	addr = pci_resource_start(dev, pci_bar);
-	len = pci_resource_len(dev, pci_bar);
-	if (addr == 0 || len == 0)
-		return -1;
-	internal_addr = ioremap(addr, len);
-	if (internal_addr == NULL)
-		return -1;
-	info->mem[n].name = name;
-	info->mem[n].addr = addr;
-	info->mem[n].internal_addr = internal_addr;
-	info->mem[n].size = len;
-	info->mem[n].memtype = UIO_MEM_PHYS;
-	return 0;
-}
-
-/* Get pci port io resources described by bar #pci_bar in uio resource n. */
-static int
-igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
-		int n, int pci_bar, const char *name)
-{
-	unsigned long addr, len;
-
-	if (n >= ARRAY_SIZE(info->port))
-		return -EINVAL;
-
-	addr = pci_resource_start(dev, pci_bar);
-	len = pci_resource_len(dev, pci_bar);
-	if (addr == 0 || len == 0)
-		return -EINVAL;
-
-	info->port[n].name = name;
-	info->port[n].start = addr;
-	info->port[n].size = len;
-	info->port[n].porttype = UIO_PORT_X86;
-
-	return 0;
-}
-
-/* Unmap previously ioremap'd resources */
-static void
-igbuio_pci_release_iomem(struct uio_info *info)
-{
-	int i;
-
-	for (i = 0; i < MAX_UIO_MAPS; i++) {
-		if (info->mem[i].internal_addr)
-			iounmap(info->mem[i].internal_addr);
-	}
-}
-
-static int
-igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
-{
-	int i, iom, iop, ret;
-	unsigned long flags;
-	static const char *bar_names[PCI_STD_RESOURCE_END + 1]  = {
-		"BAR0",
-		"BAR1",
-		"BAR2",
-		"BAR3",
-		"BAR4",
-		"BAR5",
-	};
-
-	iom = 0;
-	iop = 0;
-
-	for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
-		if (pci_resource_len(dev, i) != 0 &&
-				pci_resource_start(dev, i) != 0) {
-			flags = pci_resource_flags(dev, i);
-			if (flags & IORESOURCE_MEM) {
-				ret = igbuio_pci_setup_iomem(dev, info, iom,
-							     i, bar_names[i]);
-				if (ret != 0)
-					return ret;
-				iom++;
-			} else if (flags & IORESOURCE_IO) {
-				ret = igbuio_pci_setup_ioport(dev, info, iop,
-							      i, bar_names[i]);
-				if (ret != 0)
-					return ret;
-				iop++;
-			}
-		}
-	}
-
-	return (iom != 0 || iop != 0) ? ret : -ENOENT;
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
-static int __devinit
-#else
-static int
-#endif
-igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct rte_uio_pci_dev *udev;
-	dma_addr_t map_dma_addr;
-	void *map_addr;
-	int err;
-
-	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
-	if (!udev)
-		return -ENOMEM;
-
-	mutex_init(&udev->lock);
-	/*
-	 * enable device: ask low-level code to enable I/O and
-	 * memory
-	 */
-	err = pci_enable_device(dev);
-	if (err != 0) {
-		dev_err(&dev->dev, "Cannot enable PCI device\n");
-		goto fail_free;
-	}
-
-	/* enable bus mastering on the device */
-	pci_set_master(dev);
-
-	/* remap IO memory */
-	err = igbuio_setup_bars(dev, &udev->info);
-	if (err != 0)
-		goto fail_release_iomem;
-
-	/* set 64-bit DMA mask */
-	err = pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
-	if (err != 0) {
-		dev_err(&dev->dev, "Cannot set DMA mask\n");
-		goto fail_release_iomem;
-	}
-
-	err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
-	if (err != 0) {
-		dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
-		goto fail_release_iomem;
-	}
-
-	/* fill uio infos */
-	udev->info.name = "igb_uio";
-	udev->info.version = "0.1";
-	udev->info.irqcontrol = igbuio_pci_irqcontrol;
-	udev->info.open = igbuio_pci_open;
-	udev->info.release = igbuio_pci_release;
-	udev->info.priv = udev;
-	udev->pdev = dev;
-
-	err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
-	if (err != 0)
-		goto fail_release_iomem;
-
-	/* register uio driver */
-	err = uio_register_device(&dev->dev, &udev->info);
-	if (err != 0)
-		goto fail_remove_group;
-
-	pci_set_drvdata(dev, udev);
-
-	/*
-	 * Doing a harmless dma mapping for attaching the device to
-	 * the iommu identity mapping if kernel boots with iommu=pt.
-	 * Note this is not a problem if no IOMMU at all.
-	 */
-	map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr,
-			GFP_KERNEL);
-	if (map_addr)
-		memset(map_addr, 0, 1024);
-
-	if (!map_addr)
-		dev_info(&dev->dev, "dma mapping failed\n");
-	else {
-		dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n",
-			 (unsigned long long)map_dma_addr, map_addr);
-
-		dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr);
-		dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n",
-			 (unsigned long long)map_dma_addr, map_addr);
-	}
-
-	return 0;
-
-fail_remove_group:
-	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-fail_release_iomem:
-	igbuio_pci_release_iomem(&udev->info);
-	pci_disable_device(dev);
-fail_free:
-	kfree(udev);
-
-	return err;
-}
-
-static void
-igbuio_pci_remove(struct pci_dev *dev)
-{
-	struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
-
-	mutex_destroy(&udev->lock);
-	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-	uio_unregister_device(&udev->info);
-	igbuio_pci_release_iomem(&udev->info);
-	pci_disable_device(dev);
-	pci_set_drvdata(dev, NULL);
-	kfree(udev);
-}
-
-static int
-igbuio_config_intr_mode(char *intr_str)
-{
-	if (!intr_str) {
-		pr_info("Use MSIX interrupt by default\n");
-		return 0;
-	}
-
-	if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
-		igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
-		pr_info("Use MSIX interrupt\n");
-	} else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) {
-		igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI;
-		pr_info("Use MSI interrupt\n");
-	} else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
-		igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
-		pr_info("Use legacy interrupt\n");
-	} else {
-		pr_info("Error: bad parameter - %s\n", intr_str);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static struct pci_driver igbuio_pci_driver = {
-	.name = "igb_uio",
-	.id_table = NULL,
-	.probe = igbuio_pci_probe,
-	.remove = igbuio_pci_remove,
-};
-
-static int __init
-igbuio_pci_init_module(void)
-{
-	int ret;
-
-	ret = igbuio_config_intr_mode(intr_mode);
-	if (ret < 0)
-		return ret;
-
-	return pci_register_driver(&igbuio_pci_driver);
-}
-
-static void __exit
-igbuio_pci_exit_module(void)
-{
-	pci_unregister_driver(&igbuio_pci_driver);
-}
-
-module_init(igbuio_pci_init_module);
-module_exit(igbuio_pci_exit_module);
-
-module_param(intr_mode, charp, S_IRUGO);
-MODULE_PARM_DESC(intr_mode,
-"igb_uio interrupt mode (default=msix):\n"
-"    " RTE_INTR_MODE_MSIX_NAME "       Use MSIX interrupt\n"
-"    " RTE_INTR_MODE_MSI_NAME "        Use MSI interrupt\n"
-"    " RTE_INTR_MODE_LEGACY_NAME "     Use Legacy interrupt\n"
-"\n");
-
-MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
diff --git a/lib/librte_eal/linuxapp/igb_uio/meson.build b/lib/librte_eal/linuxapp/igb_uio/meson.build
deleted file mode 100644
index 257ef631..00000000
--- a/lib/librte_eal/linuxapp/igb_uio/meson.build
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
-
-kernel_dir = get_option('kernel_dir')
-if kernel_dir == ''
-	kernel_version = run_command('uname', '-r').stdout().strip()
-	kernel_dir = '/lib/modules/' + kernel_version + '/build'
-endif
-
-mkfile = custom_target('igb_uio_makefile',
-	output: 'Makefile',
-	command: ['touch', '@OUTPUT@'])
-
-custom_target('igb_uio',
-	input: ['igb_uio.c', 'Kbuild'],
-	output: 'igb_uio.ko',
-	command: ['make', '-C', kernel_dir,
-		'M=' + meson.current_build_dir(),
-		'src=' + meson.current_source_dir(),
-		'EXTRA_CFLAGS=-I' + meson.current_source_dir() +
-			'/../../common/include',
-		'modules'],
-	depends: mkfile,
-	build_by_default: get_option('enable_kmods'))
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
deleted file mode 100644
index 282be7b6..00000000
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = rte_kni
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-MODULE_CFLAGS += -Wall -Werror
-
--include /etc/lsb-release
-
-ifeq ($(DISTRIB_ID),Ubuntu)
-MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE))
-UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
-	 | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1)
-MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
-endif
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := kni_misc.c
-SRCS-y += kni_net.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += kni_ethtool.c
-
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_main.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_api.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_common.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_ethtool.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82599.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82598.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_x540.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_phy.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/kcompat.c
-
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_82575.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_i210.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_api.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mac.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_manage.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mbx.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_nvm.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_phy.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_ethtool.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_main.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_param.c
-SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_vmdq.c
-
-include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
deleted file mode 100644
index 3f8c0bc8..00000000
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Minimal wrappers to allow compiling kni on older kernels.
- */
-
-#include <linux/version.h>
-
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
-#endif
-
-/* SuSE version macro is the same as Linux kernel version */
-#ifndef SLE_VERSION
-#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c)
-#endif
-#ifdef CONFIG_SUSE_KERNEL
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
-/* SLES12SP3 is at least 4.4.57+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28))
-/* SLES12 is at least 3.12.28+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0)
-#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)))
-/* SLES11 SP3 is at least 3.0.61+ based */
-#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0)
-#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
-/* SLES11 SP1 is 2.6.32 based */
-#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0)
-#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27))
-/* SLES11 GA is 2.6.27 based */
-#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0)
-#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
-#endif /* CONFIG_SUSE_KERNEL */
-#ifndef SLE_VERSION_CODE
-#define SLE_VERSION_CODE 0
-#endif /* SLE_VERSION_CODE */
-
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
-	(!(defined(RHEL_RELEASE_CODE) && \
-	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
-
-#define kstrtoul strict_strtoul
-
-#endif /* < 2.6.39 */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33)
-#define HAVE_SIMPLIFIED_PERNET_OPERATIONS
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-#define sk_sleep(s) ((s)->sk_sleep)
-#else
-#define HAVE_SOCKET_WQ
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
-#define HAVE_STATIC_SOCK_MAP_FD
-#else
-#define kni_sock_map_fd(s) sock_map_fd(s, 0)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
-#define HAVE_CHANGE_CARRIER_CB
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
-#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
-#define HAVE_IOV_ITER_MSGHDR
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
-#define HAVE_KIOCB_MSG_PARAM
-#define HAVE_REBUILD_HEADER
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
-#define HAVE_SK_ALLOC_KERN_PARAM
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \
-	(defined(RHEL_RELEASE_CODE) && \
-	 RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \
-	(SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0))
-#define HAVE_TRANS_START_HELPER
-#endif
-
-/*
- * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
- * For old kernels just backported the commit that enables the macro
- * (685343fc3ba6) but still uses old API, it is required to undefine macro to
- * select correct version of API, this is safe since KNI doesn't use the value.
- * This fix is specific to RedHat/CentOS kernels.
- */
-#if (defined(RHEL_RELEASE_CODE) && \
-	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
-#undef NET_NAME_UNKNOWN
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
-#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README
deleted file mode 100644
index af36738a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/README
+++ /dev/null
@@ -1,71 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-    Copyright(c) 2010-2014 Intel Corporation.
-
-Description
-
-In order to support ethtool in Kernel NIC Interface, the standard Linux kernel
-drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version
-modified from in kernel NIC interface kernel module to support ixgbe NIC, and
-igb-3.4.8 is the version modified from in kernel NIC interface kernel module to
-support igb NIC.
-
-The source code package of ixgbe can be downloaded from sourceforge.net as below.
-http://sourceforge.net/projects/e1000/files/ixgbe%20stable/
-Below source files are copied or modified from ixgbe.
-
-ixgbe_82598.h
-ixgbe_82599.c
-ixgbe_82599.h
-ixgbe_api.c
-ixgbe_api.h
-ixgbe_common.c
-ixgbe_common.h
-ixgbe_dcb.h
-ixgbe_ethtool.c
-ixgbe_fcoe.h
-ixgbe.h
-ixgbe_main.c
-ixgbe_mbx.h
-ixgbe_osdep.h
-ixgbe_phy.c
-ixgbe_phy.h
-ixgbe_sriov.h
-ixgbe_type.h
-kcompat.c
-kcompat.h
-
-The source code package of igb can be downloaded from sourceforge.net as below.
-http://sourceforge.net/projects/e1000/files/igb%20stable/
-Below source files are copied or modified from igb.
-
-e1000_82575.c
-e1000_82575.h
-e1000_api.c
-e1000_api.h
-e1000_defines.h
-e1000_hw.h
-e1000_mac.c
-e1000_mac.h
-e1000_manage.c
-e1000_manage.h
-e1000_mbx.c
-e1000_mbx.h
-e1000_nvm.c
-e1000_nvm.h
-e1000_osdep.h
-e1000_phy.c
-e1000_phy.h
-e1000_regs.h
-igb_ethtool.c
-igb.h
-igb_main.c
-igb_param.c
-igb_procfs.c
-igb_regtest.h
-igb_sysfs.c
-igb_vmdq.c
-igb_vmdq.h
-kcompat.c
-kcompat_ethtool.c
-kcompat.h
-
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
deleted file mode 100644
index 98346709..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
+++ /dev/null
@@ -1,3650 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/*
- * 82575EB Gigabit Network Connection
- * 82575EB Gigabit Backplane Connection
- * 82575GB Gigabit Network Connection
- * 82576 Gigabit Network Connection
- * 82576 Quad Port Gigabit Mezzanine Adapter
- * 82580 Gigabit Network Connection
- * I350 Gigabit Network Connection
- */
-
-#include "e1000_api.h"
-#include "e1000_i210.h"
-
-static s32  e1000_init_phy_params_82575(struct e1000_hw *hw);
-static s32  e1000_init_mac_params_82575(struct e1000_hw *hw);
-static s32  e1000_acquire_phy_82575(struct e1000_hw *hw);
-static void e1000_release_phy_82575(struct e1000_hw *hw);
-static s32  e1000_acquire_nvm_82575(struct e1000_hw *hw);
-static void e1000_release_nvm_82575(struct e1000_hw *hw);
-static s32  e1000_check_for_link_82575(struct e1000_hw *hw);
-static s32  e1000_check_for_link_media_swap(struct e1000_hw *hw);
-static s32  e1000_get_cfg_done_82575(struct e1000_hw *hw);
-static s32  e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
-					 u16 *duplex);
-static s32  e1000_init_hw_82575(struct e1000_hw *hw);
-static s32  e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw);
-static s32  e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
-					   u16 *data);
-static s32  e1000_reset_hw_82575(struct e1000_hw *hw);
-static s32  e1000_reset_hw_82580(struct e1000_hw *hw);
-static s32  e1000_read_phy_reg_82580(struct e1000_hw *hw,
-				     u32 offset, u16 *data);
-static s32  e1000_write_phy_reg_82580(struct e1000_hw *hw,
-				      u32 offset, u16 data);
-static s32  e1000_set_d0_lplu_state_82580(struct e1000_hw *hw,
-					  bool active);
-static s32  e1000_set_d3_lplu_state_82580(struct e1000_hw *hw,
-					  bool active);
-static s32  e1000_set_d0_lplu_state_82575(struct e1000_hw *hw,
-					  bool active);
-static s32  e1000_setup_copper_link_82575(struct e1000_hw *hw);
-static s32  e1000_setup_serdes_link_82575(struct e1000_hw *hw);
-static s32  e1000_get_media_type_82575(struct e1000_hw *hw);
-static s32  e1000_set_sfp_media_type_82575(struct e1000_hw *hw);
-static s32  e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data);
-static s32  e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw,
-					    u32 offset, u16 data);
-static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw);
-static s32  e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
-static s32  e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
-						 u16 *speed, u16 *duplex);
-static s32  e1000_get_phy_id_82575(struct e1000_hw *hw);
-static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
-static bool e1000_sgmii_active_82575(struct e1000_hw *hw);
-static s32  e1000_reset_init_script_82575(struct e1000_hw *hw);
-static s32  e1000_read_mac_addr_82575(struct e1000_hw *hw);
-static void e1000_config_collision_dist_82575(struct e1000_hw *hw);
-static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw);
-static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw);
-static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw);
-static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw);
-static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw);
-static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw);
-static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw);
-static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw,
-						 u16 offset);
-static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
-						   u16 offset);
-static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw);
-static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw);
-static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value);
-static void e1000_clear_vfta_i350(struct e1000_hw *hw);
-
-static void e1000_i2c_start(struct e1000_hw *hw);
-static void e1000_i2c_stop(struct e1000_hw *hw);
-static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data);
-static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data);
-static s32 e1000_get_i2c_ack(struct e1000_hw *hw);
-static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data);
-static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data);
-static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
-static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
-static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data);
-static bool e1000_get_i2c_data(u32 *i2cctl);
-
-static const u16 e1000_82580_rxpbs_table[] = {
-	36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
-#define E1000_82580_RXPBS_TABLE_SIZE \
-	(sizeof(e1000_82580_rxpbs_table)/sizeof(u16))
-
-
-/**
- *  e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
- *  @hw: pointer to the HW structure
- *
- *  Called to determine if the I2C pins are being used for I2C or as an
- *  external MDIO interface since the two options are mutually exclusive.
- **/
-static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw)
-{
-	u32 reg = 0;
-	bool ext_mdio = false;
-
-	DEBUGFUNC("e1000_sgmii_uses_mdio_82575");
-
-	switch (hw->mac.type) {
-	case e1000_82575:
-	case e1000_82576:
-		reg = E1000_READ_REG(hw, E1000_MDIC);
-		ext_mdio = !!(reg & E1000_MDIC_DEST);
-		break;
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		reg = E1000_READ_REG(hw, E1000_MDICNFG);
-		ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO);
-		break;
-	default:
-		break;
-	}
-	return ext_mdio;
-}
-
-/**
- *  e1000_init_phy_params_82575 - Init PHY func ptrs.
- *  @hw: pointer to the HW structure
- **/
-static s32 e1000_init_phy_params_82575(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u32 ctrl_ext;
-
-	DEBUGFUNC("e1000_init_phy_params_82575");
-
-	phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic;
-	phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic;
-
-	if (hw->phy.media_type != e1000_media_type_copper) {
-		phy->type = e1000_phy_none;
-		goto out;
-	}
-
-	phy->ops.power_up   = e1000_power_up_phy_copper;
-	phy->ops.power_down = e1000_power_down_phy_copper_82575;
-
-	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
-	phy->reset_delay_us	= 100;
-
-	phy->ops.acquire	= e1000_acquire_phy_82575;
-	phy->ops.check_reset_block = e1000_check_reset_block_generic;
-	phy->ops.commit		= e1000_phy_sw_reset_generic;
-	phy->ops.get_cfg_done	= e1000_get_cfg_done_82575;
-	phy->ops.release	= e1000_release_phy_82575;
-
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-
-	if (e1000_sgmii_active_82575(hw)) {
-		phy->ops.reset = e1000_phy_hw_reset_sgmii_82575;
-		ctrl_ext |= E1000_CTRL_I2C_ENA;
-	} else {
-		phy->ops.reset = e1000_phy_hw_reset_generic;
-		ctrl_ext &= ~E1000_CTRL_I2C_ENA;
-	}
-
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-	e1000_reset_mdicnfg_82580(hw);
-
-	if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) {
-		phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575;
-		phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575;
-	} else {
-		switch (hw->mac.type) {
-		case e1000_82580:
-		case e1000_i350:
-		case e1000_i354:
-			phy->ops.read_reg = e1000_read_phy_reg_82580;
-			phy->ops.write_reg = e1000_write_phy_reg_82580;
-			break;
-		case e1000_i210:
-		case e1000_i211:
-			phy->ops.read_reg = e1000_read_phy_reg_gs40g;
-			phy->ops.write_reg = e1000_write_phy_reg_gs40g;
-			break;
-		default:
-			phy->ops.read_reg = e1000_read_phy_reg_igp;
-			phy->ops.write_reg = e1000_write_phy_reg_igp;
-		}
-	}
-
-	/* Set phy->phy_addr and phy->id. */
-	ret_val = e1000_get_phy_id_82575(hw);
-
-	/* Verify phy id and set remaining function pointers */
-	switch (phy->id) {
-	case M88E1543_E_PHY_ID:
-	case I347AT4_E_PHY_ID:
-	case M88E1112_E_PHY_ID:
-	case M88E1340M_E_PHY_ID:
-	case M88E1111_I_PHY_ID:
-		phy->type		= e1000_phy_m88;
-		phy->ops.check_polarity	= e1000_check_polarity_m88;
-		phy->ops.get_info	= e1000_get_phy_info_m88;
-		if (phy->id == I347AT4_E_PHY_ID ||
-		    phy->id == M88E1112_E_PHY_ID ||
-		    phy->id == M88E1340M_E_PHY_ID)
-			phy->ops.get_cable_length =
-					 e1000_get_cable_length_m88_gen2;
-		else if (phy->id == M88E1543_E_PHY_ID)
-			phy->ops.get_cable_length =
-					 e1000_get_cable_length_m88_gen2;
-		else
-			phy->ops.get_cable_length = e1000_get_cable_length_m88;
-		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
-		/* Check if this PHY is configured for media swap. */
-		if (phy->id == M88E1112_E_PHY_ID) {
-			u16 data;
-
-			ret_val = phy->ops.write_reg(hw,
-						     E1000_M88E1112_PAGE_ADDR,
-						     2);
-			if (ret_val)
-				goto out;
-
-			ret_val = phy->ops.read_reg(hw,
-						    E1000_M88E1112_MAC_CTRL_1,
-						    &data);
-			if (ret_val)
-				goto out;
-
-			data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
-			       E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
-			if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
-			    data == E1000_M88E1112_AUTO_COPPER_BASEX)
-				hw->mac.ops.check_for_link =
-						e1000_check_for_link_media_swap;
-		}
-		break;
-	case IGP03E1000_E_PHY_ID:
-	case IGP04E1000_E_PHY_ID:
-		phy->type = e1000_phy_igp_3;
-		phy->ops.check_polarity = e1000_check_polarity_igp;
-		phy->ops.get_info = e1000_get_phy_info_igp;
-		phy->ops.get_cable_length = e1000_get_cable_length_igp_2;
-		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp;
-		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575;
-		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic;
-		break;
-	case I82580_I_PHY_ID:
-	case I350_I_PHY_ID:
-		phy->type = e1000_phy_82580;
-		phy->ops.check_polarity = e1000_check_polarity_82577;
-		phy->ops.force_speed_duplex =
-					 e1000_phy_force_speed_duplex_82577;
-		phy->ops.get_cable_length = e1000_get_cable_length_82577;
-		phy->ops.get_info = e1000_get_phy_info_82577;
-		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
-		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
-		break;
-	case I210_I_PHY_ID:
-		phy->type		= e1000_phy_i210;
-		phy->ops.check_polarity	= e1000_check_polarity_m88;
-		phy->ops.get_info	= e1000_get_phy_info_m88;
-		phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2;
-		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
-		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
-		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
-		break;
-	default:
-		ret_val = -E1000_ERR_PHY;
-		goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_init_nvm_params_82575 - Init NVM func ptrs.
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_init_nvm_params_82575(struct e1000_hw *hw)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-	u16 size;
-
-	DEBUGFUNC("e1000_init_nvm_params_82575");
-
-	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
-		     E1000_EECD_SIZE_EX_SHIFT);
-	/*
-	 * Added to a constant, "size" becomes the left-shift value
-	 * for setting word_size.
-	 */
-	size += NVM_WORD_SIZE_BASE_SHIFT;
-
-	/* Just in case size is out of range, cap it to the largest
-	 * EEPROM size supported
-	 */
-	if (size > 15)
-		size = 15;
-
-	nvm->word_size = 1 << size;
-	if (hw->mac.type < e1000_i210) {
-		nvm->opcode_bits = 8;
-		nvm->delay_usec = 1;
-
-		switch (nvm->override) {
-		case e1000_nvm_override_spi_large:
-			nvm->page_size = 32;
-			nvm->address_bits = 16;
-			break;
-		case e1000_nvm_override_spi_small:
-			nvm->page_size = 8;
-			nvm->address_bits = 8;
-			break;
-		default:
-			nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
-			nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
-					    16 : 8;
-			break;
-		}
-		if (nvm->word_size == (1 << 15))
-			nvm->page_size = 128;
-
-		nvm->type = e1000_nvm_eeprom_spi;
-	} else {
-		nvm->type = e1000_nvm_flash_hw;
-	}
-
-	/* Function Pointers */
-	nvm->ops.acquire = e1000_acquire_nvm_82575;
-	nvm->ops.release = e1000_release_nvm_82575;
-	if (nvm->word_size < (1 << 15))
-		nvm->ops.read = e1000_read_nvm_eerd;
-	else
-		nvm->ops.read = e1000_read_nvm_spi;
-
-	nvm->ops.write = e1000_write_nvm_spi;
-	nvm->ops.validate = e1000_validate_nvm_checksum_generic;
-	nvm->ops.update = e1000_update_nvm_checksum_generic;
-	nvm->ops.valid_led_default = e1000_valid_led_default_82575;
-
-	/* override generic family function pointers for specific descendants */
-	switch (hw->mac.type) {
-	case e1000_82580:
-		nvm->ops.validate = e1000_validate_nvm_checksum_82580;
-		nvm->ops.update = e1000_update_nvm_checksum_82580;
-		break;
-	case e1000_i350:
-	//case e1000_i354:
-		nvm->ops.validate = e1000_validate_nvm_checksum_i350;
-		nvm->ops.update = e1000_update_nvm_checksum_i350;
-		break;
-	default:
-		break;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_init_mac_params_82575 - Init MAC func ptrs.
- *  @hw: pointer to the HW structure
- **/
-static s32 e1000_init_mac_params_82575(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
-
-	DEBUGFUNC("e1000_init_mac_params_82575");
-
-	/* Derives media type */
-	e1000_get_media_type_82575(hw);
-	/* Set mta register count */
-	mac->mta_reg_count = 128;
-	/* Set uta register count */
-	mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128;
-	/* Set rar entry count */
-	mac->rar_entry_count = E1000_RAR_ENTRIES_82575;
-	if (mac->type == e1000_82576)
-		mac->rar_entry_count = E1000_RAR_ENTRIES_82576;
-	if (mac->type == e1000_82580)
-		mac->rar_entry_count = E1000_RAR_ENTRIES_82580;
-	if (mac->type == e1000_i350 || mac->type == e1000_i354)
-		mac->rar_entry_count = E1000_RAR_ENTRIES_I350;
-
-	/* Enable EEE default settings for EEE supported devices */
-	if (mac->type >= e1000_i350)
-		dev_spec->eee_disable = false;
-
-	/* Allow a single clear of the SW semaphore on I210 and newer */
-	if (mac->type >= e1000_i210)
-		dev_spec->clear_semaphore_once = true;
-
-	/* Set if part includes ASF firmware */
-	mac->asf_firmware_present = true;
-	/* FWSM register */
-	mac->has_fwsm = true;
-	/* ARC supported; valid only if manageability features are enabled. */
-	mac->arc_subsystem_valid =
-		!!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK);
-
-	/* Function pointers */
-
-	/* bus type/speed/width */
-	mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic;
-	/* reset */
-	if (mac->type >= e1000_82580)
-		mac->ops.reset_hw = e1000_reset_hw_82580;
-	else
-	mac->ops.reset_hw = e1000_reset_hw_82575;
-	/* hw initialization */
-	mac->ops.init_hw = e1000_init_hw_82575;
-	/* link setup */
-	mac->ops.setup_link = e1000_setup_link_generic;
-	/* physical interface link setup */
-	mac->ops.setup_physical_interface =
-		(hw->phy.media_type == e1000_media_type_copper)
-		? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575;
-	/* physical interface shutdown */
-	mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575;
-	/* physical interface power up */
-	mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575;
-	/* check for link */
-	mac->ops.check_for_link = e1000_check_for_link_82575;
-	/* read mac address */
-	mac->ops.read_mac_addr = e1000_read_mac_addr_82575;
-	/* configure collision distance */
-	mac->ops.config_collision_dist = e1000_config_collision_dist_82575;
-	/* multicast address update */
-	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
-	if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) {
-		/* writing VFTA */
-		mac->ops.write_vfta = e1000_write_vfta_i350;
-		/* clearing VFTA */
-		mac->ops.clear_vfta = e1000_clear_vfta_i350;
-	} else {
-		/* writing VFTA */
-		mac->ops.write_vfta = e1000_write_vfta_generic;
-		/* clearing VFTA */
-		mac->ops.clear_vfta = e1000_clear_vfta_generic;
-	}
-	if (hw->mac.type >= e1000_82580)
-		mac->ops.validate_mdi_setting =
-				e1000_validate_mdi_setting_crossover_generic;
-	/* ID LED init */
-	mac->ops.id_led_init = e1000_id_led_init_generic;
-	/* blink LED */
-	mac->ops.blink_led = e1000_blink_led_generic;
-	/* setup LED */
-	mac->ops.setup_led = e1000_setup_led_generic;
-	/* cleanup LED */
-	mac->ops.cleanup_led = e1000_cleanup_led_generic;
-	/* turn on/off LED */
-	mac->ops.led_on = e1000_led_on_generic;
-	mac->ops.led_off = e1000_led_off_generic;
-	/* clear hardware counters */
-	mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575;
-	/* link info */
-	mac->ops.get_link_up_info = e1000_get_link_up_info_82575;
-	/* get thermal sensor data */
-	mac->ops.get_thermal_sensor_data =
-				e1000_get_thermal_sensor_data_generic;
-	mac->ops.init_thermal_sensor_thresh =
-				e1000_init_thermal_sensor_thresh_generic;
-	/* acquire SW_FW sync */
-	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575;
-	mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575;
-	if (mac->type >= e1000_i210) {
-		mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210;
-		mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210;
-	}
-
-	/* set lan id for port to determine which phy lock to use */
-	hw->mac.ops.set_lan_id(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_init_function_pointers_82575 - Init func ptrs.
- *  @hw: pointer to the HW structure
- *
- *  Called to initialize all function pointers and parameters.
- **/
-void e1000_init_function_pointers_82575(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_init_function_pointers_82575");
-
-	hw->mac.ops.init_params = e1000_init_mac_params_82575;
-	hw->nvm.ops.init_params = e1000_init_nvm_params_82575;
-	hw->phy.ops.init_params = e1000_init_phy_params_82575;
-	hw->mbx.ops.init_params = e1000_init_mbx_params_pf;
-}
-
-/**
- *  e1000_acquire_phy_82575 - Acquire rights to access PHY
- *  @hw: pointer to the HW structure
- *
- *  Acquire access rights to the correct PHY.
- **/
-static s32 e1000_acquire_phy_82575(struct e1000_hw *hw)
-{
-	u16 mask = E1000_SWFW_PHY0_SM;
-
-	DEBUGFUNC("e1000_acquire_phy_82575");
-
-	if (hw->bus.func == E1000_FUNC_1)
-		mask = E1000_SWFW_PHY1_SM;
-	else if (hw->bus.func == E1000_FUNC_2)
-		mask = E1000_SWFW_PHY2_SM;
-	else if (hw->bus.func == E1000_FUNC_3)
-		mask = E1000_SWFW_PHY3_SM;
-
-	return hw->mac.ops.acquire_swfw_sync(hw, mask);
-}
-
-/**
- *  e1000_release_phy_82575 - Release rights to access PHY
- *  @hw: pointer to the HW structure
- *
- *  A wrapper to release access rights to the correct PHY.
- **/
-static void e1000_release_phy_82575(struct e1000_hw *hw)
-{
-	u16 mask = E1000_SWFW_PHY0_SM;
-
-	DEBUGFUNC("e1000_release_phy_82575");
-
-	if (hw->bus.func == E1000_FUNC_1)
-		mask = E1000_SWFW_PHY1_SM;
-	else if (hw->bus.func == E1000_FUNC_2)
-		mask = E1000_SWFW_PHY2_SM;
-	else if (hw->bus.func == E1000_FUNC_3)
-		mask = E1000_SWFW_PHY3_SM;
-
-	hw->mac.ops.release_swfw_sync(hw, mask);
-}
-
-/**
- *  e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the PHY register at offset using the serial gigabit media independent
- *  interface and stores the retrieved information in data.
- **/
-static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
-					  u16 *data)
-{
-	s32 ret_val = -E1000_ERR_PARAM;
-
-	DEBUGFUNC("e1000_read_phy_reg_sgmii_82575");
-
-	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
-		DEBUGOUT1("PHY Address %u is out of range\n", offset);
-		goto out;
-	}
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_read_phy_reg_i2c(hw, offset, data);
-
-	hw->phy.ops.release(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Writes the data to PHY register at the offset using the serial gigabit
- *  media independent interface.
- **/
-static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
-					   u16 data)
-{
-	s32 ret_val = -E1000_ERR_PARAM;
-
-	DEBUGFUNC("e1000_write_phy_reg_sgmii_82575");
-
-	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
-		DEBUGOUT1("PHY Address %d is out of range\n", offset);
-		goto out;
-	}
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_write_phy_reg_i2c(hw, offset, data);
-
-	hw->phy.ops.release(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_get_phy_id_82575 - Retrieve PHY addr and id
- *  @hw: pointer to the HW structure
- *
- *  Retrieves the PHY address and ID for both PHY's which do and do not use
- *  sgmi interface.
- **/
-static s32 e1000_get_phy_id_82575(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32  ret_val = E1000_SUCCESS;
-	u16 phy_id;
-	u32 ctrl_ext;
-	u32 mdic;
-
-	DEBUGFUNC("e1000_get_phy_id_82575");
-
-	/* i354 devices can have a PHY that needs an extra read for id */
-	if (hw->mac.type == e1000_i354)
-		e1000_get_phy_id(hw);
-
-
-	/*
-	 * For SGMII PHYs, we try the list of possible addresses until
-	 * we find one that works.  For non-SGMII PHYs
-	 * (e.g. integrated copper PHYs), an address of 1 should
-	 * work.  The result of this function should mean phy->phy_addr
-	 * and phy->id are set correctly.
-	 */
-	if (!e1000_sgmii_active_82575(hw)) {
-		phy->addr = 1;
-		ret_val = e1000_get_phy_id(hw);
-		goto out;
-	}
-
-	if (e1000_sgmii_uses_mdio_82575(hw)) {
-		switch (hw->mac.type) {
-		case e1000_82575:
-		case e1000_82576:
-			mdic = E1000_READ_REG(hw, E1000_MDIC);
-			mdic &= E1000_MDIC_PHY_MASK;
-			phy->addr = mdic >> E1000_MDIC_PHY_SHIFT;
-			break;
-		case e1000_82580:
-		case e1000_i350:
-		case e1000_i354:
-		case e1000_i210:
-		case e1000_i211:
-			mdic = E1000_READ_REG(hw, E1000_MDICNFG);
-			mdic &= E1000_MDICNFG_PHY_MASK;
-			phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT;
-			break;
-		default:
-			ret_val = -E1000_ERR_PHY;
-			goto out;
-			break;
-		}
-		ret_val = e1000_get_phy_id(hw);
-		goto out;
-	}
-
-	/* Power on sgmii phy if it is disabled */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
-			ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA);
-	E1000_WRITE_FLUSH(hw);
-	msec_delay(300);
-
-	/*
-	 * The address field in the I2CCMD register is 3 bits and 0 is invalid.
-	 * Therefore, we need to test 1-7
-	 */
-	for (phy->addr = 1; phy->addr < 8; phy->addr++) {
-		ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id);
-		if (ret_val == E1000_SUCCESS) {
-			DEBUGOUT2("Vendor ID 0x%08X read at address %u\n",
-				  phy_id, phy->addr);
-			/*
-			 * At the time of this writing, The M88 part is
-			 * the only supported SGMII PHY product.
-			 */
-			if (phy_id == M88_VENDOR)
-				break;
-		} else {
-			DEBUGOUT1("PHY address %u was unreadable\n",
-				  phy->addr);
-		}
-	}
-
-	/* A valid PHY type couldn't be found. */
-	if (phy->addr == 8) {
-		phy->addr = 0;
-		ret_val = -E1000_ERR_PHY;
-	} else {
-		ret_val = e1000_get_phy_id(hw);
-	}
-
-	/* restore previous sfp cage power state */
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset
- *  @hw: pointer to the HW structure
- *
- *  Resets the PHY using the serial gigabit media independent interface.
- **/
-static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575");
-
-	/*
-	 * This isn't a true "hard" reset, but is the only reset
-	 * available to us at this time.
-	 */
-
-	DEBUGOUT("Soft resetting SGMII attached PHY...\n");
-
-	if (!(hw->phy.ops.write_reg))
-		goto out;
-
-	/*
-	 * SFP documentation requires the following to configure the SPF module
-	 * to work on SGMII.  No further documentation is given.
-	 */
-	ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084);
-	if (ret_val)
-		goto out;
-
-	ret_val = hw->phy.ops.commit(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state
- *  @hw: pointer to the HW structure
- *  @active: true to enable LPLU, false to disable
- *
- *  Sets the LPLU D0 state according to the active flag.  When
- *  activating LPLU this function also disables smart speed
- *  and vice versa.  LPLU will not be activated unless the
- *  device autonegotiation advertisement meets standards of
- *  either 10 or 10/100 or 10/100/1000 at all duplexes.
- *  This is a function pointer entry point only called by
- *  PHY setup routines.
- **/
-static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u16 data;
-
-	DEBUGFUNC("e1000_set_d0_lplu_state_82575");
-
-	if (!(hw->phy.ops.read_reg))
-		goto out;
-
-	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
-	if (ret_val)
-		goto out;
-
-	if (active) {
-		data |= IGP02E1000_PM_D0_LPLU;
-		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
-					     data);
-		if (ret_val)
-			goto out;
-
-		/* When LPLU is enabled, we should disable SmartSpeed */
-		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
-					    &data);
-		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
-		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
-					     data);
-		if (ret_val)
-			goto out;
-	} else {
-		data &= ~IGP02E1000_PM_D0_LPLU;
-		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
-					     data);
-		/*
-		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
-		 * during Dx states where the power conservation is most
-		 * important.  During driver activity we should enable
-		 * SmartSpeed, so performance is maintained.
-		 */
-		if (phy->smart_speed == e1000_smart_speed_on) {
-			ret_val = phy->ops.read_reg(hw,
-						    IGP01E1000_PHY_PORT_CONFIG,
-						    &data);
-			if (ret_val)
-				goto out;
-
-			data |= IGP01E1000_PSCFR_SMART_SPEED;
-			ret_val = phy->ops.write_reg(hw,
-						     IGP01E1000_PHY_PORT_CONFIG,
-						     data);
-			if (ret_val)
-				goto out;
-		} else if (phy->smart_speed == e1000_smart_speed_off) {
-			ret_val = phy->ops.read_reg(hw,
-						    IGP01E1000_PHY_PORT_CONFIG,
-						    &data);
-			if (ret_val)
-				goto out;
-
-			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
-			ret_val = phy->ops.write_reg(hw,
-						     IGP01E1000_PHY_PORT_CONFIG,
-						     data);
-			if (ret_val)
-				goto out;
-		}
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state
- *  @hw: pointer to the HW structure
- *  @active: true to enable LPLU, false to disable
- *
- *  Sets the LPLU D0 state according to the active flag.  When
- *  activating LPLU this function also disables smart speed
- *  and vice versa.  LPLU will not be activated unless the
- *  device autonegotiation advertisement meets standards of
- *  either 10 or 10/100 or 10/100/1000 at all duplexes.
- *  This is a function pointer entry point only called by
- *  PHY setup routines.
- **/
-static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u32 data;
-
-	DEBUGFUNC("e1000_set_d0_lplu_state_82580");
-
-	data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
-
-	if (active) {
-		data |= E1000_82580_PM_D0_LPLU;
-
-		/* When LPLU is enabled, we should disable SmartSpeed */
-		data &= ~E1000_82580_PM_SPD;
-	} else {
-		data &= ~E1000_82580_PM_D0_LPLU;
-
-		/*
-		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
-		 * during Dx states where the power conservation is most
-		 * important.  During driver activity we should enable
-		 * SmartSpeed, so performance is maintained.
-		 */
-		if (phy->smart_speed == e1000_smart_speed_on)
-			data |= E1000_82580_PM_SPD;
-		else if (phy->smart_speed == e1000_smart_speed_off)
-			data &= ~E1000_82580_PM_SPD;
-	}
-
-	E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
-	return ret_val;
-}
-
-/**
- *  e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3
- *  @hw: pointer to the HW structure
- *  @active: boolean used to enable/disable lplu
- *
- *  Success returns 0, Failure returns 1
- *
- *  The low power link up (lplu) state is set to the power management level D3
- *  and SmartSpeed is disabled when active is true, else clear lplu for D3
- *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
- *  is used during Dx states where the power conservation is most important.
- *  During driver activity, SmartSpeed should be enabled so performance is
- *  maintained.
- **/
-s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u32 data;
-
-	DEBUGFUNC("e1000_set_d3_lplu_state_82580");
-
-	data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
-
-	if (!active) {
-		data &= ~E1000_82580_PM_D3_LPLU;
-		/*
-		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
-		 * during Dx states where the power conservation is most
-		 * important.  During driver activity we should enable
-		 * SmartSpeed, so performance is maintained.
-		 */
-		if (phy->smart_speed == e1000_smart_speed_on)
-			data |= E1000_82580_PM_SPD;
-		else if (phy->smart_speed == e1000_smart_speed_off)
-			data &= ~E1000_82580_PM_SPD;
-	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
-		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
-		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
-		data |= E1000_82580_PM_D3_LPLU;
-		/* When LPLU is enabled, we should disable SmartSpeed */
-		data &= ~E1000_82580_PM_SPD;
-	}
-
-	E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
-	return ret_val;
-}
-
-/**
- *  e1000_acquire_nvm_82575 - Request for access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Acquire the necessary semaphores for exclusive access to the EEPROM.
- *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
- *  Return successful if access grant bit set, else clear the request for
- *  EEPROM access and return -E1000_ERR_NVM (-1).
- **/
-static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_acquire_nvm_82575");
-
-	ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
-	if (ret_val)
-		goto out;
-
-	/*
-	 * Check if there is some access
-	 * error this access may hook on
-	 */
-	if (hw->mac.type == e1000_i350) {
-		u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-		if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT |
-		    E1000_EECD_TIMEOUT)) {
-			/* Clear all access error flags */
-			E1000_WRITE_REG(hw, E1000_EECD, eecd |
-					E1000_EECD_ERROR_CLR);
-			DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
-		}
-	}
-	if (hw->mac.type == e1000_82580) {
-		u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-		if (eecd & E1000_EECD_BLOCKED) {
-			/* Clear access error flag */
-			E1000_WRITE_REG(hw, E1000_EECD, eecd |
-					E1000_EECD_BLOCKED);
-			DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
-		}
-	}
-
-
-	ret_val = e1000_acquire_nvm_generic(hw);
-	if (ret_val)
-		e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_nvm_82575 - Release exclusive access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
- *  then release the semaphores acquired.
- **/
-static void e1000_release_nvm_82575(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_release_nvm_82575");
-
-	e1000_release_nvm_generic(hw);
-
-	e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
-}
-
-/**
- *  e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
-	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
-
-	DEBUGFUNC("e1000_acquire_swfw_sync_82575");
-
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore_generic(hw)) {
-			ret_val = -E1000_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore_generic(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		ret_val = -E1000_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_swfw_sync_82575 - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync_82575");
-
-	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-}
-
-/**
- *  e1000_get_cfg_done_82575 - Read config done bit
- *  @hw: pointer to the HW structure
- *
- *  Read the management control register for the config done bit for
- *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
- *  to read the config done bit, so an error is *ONLY* logged and returns
- *  E1000_SUCCESS.  If we were to return with error, EEPROM-less silicon
- *  would not be able to be reset or change link.
- **/
-static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw)
-{
-	s32 timeout = PHY_CFG_TIMEOUT;
-	s32 ret_val = E1000_SUCCESS;
-	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
-
-	DEBUGFUNC("e1000_get_cfg_done_82575");
-
-	if (hw->bus.func == E1000_FUNC_1)
-		mask = E1000_NVM_CFG_DONE_PORT_1;
-	else if (hw->bus.func == E1000_FUNC_2)
-		mask = E1000_NVM_CFG_DONE_PORT_2;
-	else if (hw->bus.func == E1000_FUNC_3)
-		mask = E1000_NVM_CFG_DONE_PORT_3;
-	while (timeout) {
-		if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask)
-			break;
-		msec_delay(1);
-		timeout--;
-	}
-	if (!timeout)
-		DEBUGOUT("MNG configuration cycle has not completed.\n");
-
-	/* If EEPROM is not marked present, init the PHY manually */
-	if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) &&
-	    (hw->phy.type == e1000_phy_igp_3))
-		e1000_phy_init_script_igp3(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_link_up_info_82575 - Get link speed/duplex info
- *  @hw: pointer to the HW structure
- *  @speed: stores the current speed
- *  @duplex: stores the current duplex
- *
- *  This is a wrapper function, if using the serial gigabit media independent
- *  interface, use PCS to retrieve the link speed and duplex information.
- *  Otherwise, use the generic function to get the link speed and duplex info.
- **/
-static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
-					u16 *duplex)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_get_link_up_info_82575");
-
-	if (hw->phy.media_type != e1000_media_type_copper)
-		ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed,
-							       duplex);
-	else
-		ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed,
-								    duplex);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_link_82575 - Check for link
- *  @hw: pointer to the HW structure
- *
- *  If sgmii is enabled, then use the pcs register to determine link, otherwise
- *  use the generic interface for determining link.
- **/
-static s32 e1000_check_for_link_82575(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 speed, duplex;
-
-	DEBUGFUNC("e1000_check_for_link_82575");
-
-	if (hw->phy.media_type != e1000_media_type_copper) {
-		ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed,
-							       &duplex);
-		/*
-		 * Use this flag to determine if link needs to be checked or
-		 * not.  If we have link clear the flag so that we do not
-		 * continue to check for link.
-		 */
-		hw->mac.get_link_status = !hw->mac.serdes_has_link;
-
-		/*
-		 * Configure Flow Control now that Auto-Neg has completed.
-		 * First, we need to restore the desired flow control
-		 * settings because we may have had to re-autoneg with a
-		 * different link partner.
-		 */
-		ret_val = e1000_config_fc_after_link_up_generic(hw);
-		if (ret_val)
-			DEBUGOUT("Error configuring flow control\n");
-	} else {
-		ret_val = e1000_check_for_copper_link_generic(hw);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_link_media_swap - Check which M88E1112 interface linked
- *  @hw: pointer to the HW structure
- *
- *  Poll the M88E1112 interfaces to see which interface achieved link.
- */
-static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-	u8 port = 0;
-
-	DEBUGFUNC("e1000_check_for_link_media_swap");
-
-	/* Check the copper medium. */
-	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
-	if (ret_val)
-		return ret_val;
-
-	if (data & E1000_M88E1112_STATUS_LINK)
-		port = E1000_MEDIA_PORT_COPPER;
-
-	/* Check the other medium. */
-	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
-	if (ret_val)
-		return ret_val;
-
-	if (data & E1000_M88E1112_STATUS_LINK)
-		port = E1000_MEDIA_PORT_OTHER;
-
-	/* Determine if a swap needs to happen. */
-	if (port && (hw->dev_spec._82575.media_port != port)) {
-		hw->dev_spec._82575.media_port = port;
-		hw->dev_spec._82575.media_changed = true;
-	} else {
-		ret_val = e1000_check_for_link_82575(hw);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown
- *  @hw: pointer to the HW structure
- **/
-static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw)
-{
-	u32 reg;
-
-	DEBUGFUNC("e1000_power_up_serdes_link_82575");
-
-	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
-	    !e1000_sgmii_active_82575(hw))
-		return;
-
-	/* Enable PCS to turn on link */
-	reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
-	reg |= E1000_PCS_CFG_PCS_EN;
-	E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
-
-	/* Power up the laser */
-	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	reg &= ~E1000_CTRL_EXT_SDP3_DATA;
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
-
-	/* flush the write to verify completion */
-	E1000_WRITE_FLUSH(hw);
-	msec_delay(1);
-}
-
-/**
- *  e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex
- *  @hw: pointer to the HW structure
- *  @speed: stores the current speed
- *  @duplex: stores the current duplex
- *
- *  Using the physical coding sub-layer (PCS), retrieve the current speed and
- *  duplex, then store the values in the pointers provided.
- **/
-static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
-						u16 *speed, u16 *duplex)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 pcs;
-	u32 status;
-
-	DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575");
-
-	/*
-	 * Read the PCS Status register for link state. For non-copper mode,
-	 * the status register is not accurate. The PCS status register is
-	 * used instead.
-	 */
-	pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT);
-
-	/*
-	 * The link up bit determines when link is up on autoneg.
-	 */
-	if (pcs & E1000_PCS_LSTS_LINK_OK) {
-		mac->serdes_has_link = true;
-
-		/* Detect and store PCS speed */
-		if (pcs & E1000_PCS_LSTS_SPEED_1000)
-			*speed = SPEED_1000;
-		else if (pcs & E1000_PCS_LSTS_SPEED_100)
-			*speed = SPEED_100;
-		else
-			*speed = SPEED_10;
-
-		/* Detect and store PCS duplex */
-		if (pcs & E1000_PCS_LSTS_DUPLEX_FULL)
-			*duplex = FULL_DUPLEX;
-		else
-			*duplex = HALF_DUPLEX;
-
-		/* Check if it is an I354 2.5Gb backplane connection. */
-		if (mac->type == e1000_i354) {
-			status = E1000_READ_REG(hw, E1000_STATUS);
-			if ((status & E1000_STATUS_2P5_SKU) &&
-			    !(status & E1000_STATUS_2P5_SKU_OVER)) {
-				*speed = SPEED_2500;
-				*duplex = FULL_DUPLEX;
-				DEBUGOUT("2500 Mbs, ");
-				DEBUGOUT("Full Duplex\n");
-			}
-		}
-
-	} else {
-		mac->serdes_has_link = false;
-		*speed = 0;
-		*duplex = 0;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_shutdown_serdes_link_82575 - Remove link during power down
- *  @hw: pointer to the HW structure
- *
- *  In the case of serdes shut down sfp and PCS on driver unload
- *  when management pass through is not enabled.
- **/
-void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw)
-{
-	u32 reg;
-
-	DEBUGFUNC("e1000_shutdown_serdes_link_82575");
-
-	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
-	    !e1000_sgmii_active_82575(hw))
-		return;
-
-	if (!e1000_enable_mng_pass_thru(hw)) {
-		/* Disable PCS to turn off link */
-		reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
-		reg &= ~E1000_PCS_CFG_PCS_EN;
-		E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
-
-		/* shutdown the laser */
-		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
-		reg |= E1000_CTRL_EXT_SDP3_DATA;
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
-
-		/* flush the write to verify completion */
-		E1000_WRITE_FLUSH(hw);
-		msec_delay(1);
-	}
-
-	return;
-}
-
-/**
- *  e1000_reset_hw_82575 - Reset hardware
- *  @hw: pointer to the HW structure
- *
- *  This resets the hardware into a known state.
- **/
-static s32 e1000_reset_hw_82575(struct e1000_hw *hw)
-{
-	u32 ctrl;
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_reset_hw_82575");
-
-	/*
-	 * Prevent the PCI-E bus from sticking if there is no TLP connection
-	 * on the last TLP read/write transaction when MAC is reset.
-	 */
-	ret_val = e1000_disable_pcie_master_generic(hw);
-	if (ret_val)
-		DEBUGOUT("PCI-E Master disable polling has failed.\n");
-
-	/* set the completion timeout for interface */
-	ret_val = e1000_set_pcie_completion_timeout(hw);
-	if (ret_val)
-		DEBUGOUT("PCI-E Set completion timeout has failed.\n");
-
-	DEBUGOUT("Masking off all interrupts\n");
-	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
-
-	E1000_WRITE_REG(hw, E1000_RCTL, 0);
-	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
-	E1000_WRITE_FLUSH(hw);
-
-	msec_delay(10);
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-
-	DEBUGOUT("Issuing a global reset to MAC\n");
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
-
-	ret_val = e1000_get_auto_rd_done_generic(hw);
-	if (ret_val) {
-		/*
-		 * When auto config read does not complete, do not
-		 * return with an error. This can happen in situations
-		 * where there is no eeprom and prevents getting link.
-		 */
-		DEBUGOUT("Auto Read Done did not complete\n");
-	}
-
-	/* If EEPROM is not present, run manual init scripts */
-	if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES))
-		e1000_reset_init_script_82575(hw);
-
-	/* Clear any pending interrupt events. */
-	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
-	E1000_READ_REG(hw, E1000_ICR);
-
-	/* Install any alternate MAC address into RAR0 */
-	ret_val = e1000_check_alt_mac_addr_generic(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_init_hw_82575 - Initialize hardware
- *  @hw: pointer to the HW structure
- *
- *  This inits the hardware readying it for operation.
- **/
-static s32 e1000_init_hw_82575(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	s32 ret_val;
-	u16 i, rar_count = mac->rar_entry_count;
-
-	DEBUGFUNC("e1000_init_hw_82575");
-
-	/* Initialize identification LED */
-	ret_val = mac->ops.id_led_init(hw);
-	if (ret_val) {
-		DEBUGOUT("Error initializing identification LED\n");
-		/* This is not fatal and we should not stop init due to this */
-	}
-
-	/* Disabling VLAN filtering */
-	DEBUGOUT("Initializing the IEEE VLAN\n");
-	mac->ops.clear_vfta(hw);
-
-	/* Setup the receive address */
-	e1000_init_rx_addrs_generic(hw, rar_count);
-
-	/* Zero out the Multicast HASH table */
-	DEBUGOUT("Zeroing the MTA\n");
-	for (i = 0; i < mac->mta_reg_count; i++)
-		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
-
-	/* Zero out the Unicast HASH table */
-	DEBUGOUT("Zeroing the UTA\n");
-	for (i = 0; i < mac->uta_reg_count; i++)
-		E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0);
-
-	/* Setup link and flow control */
-	ret_val = mac->ops.setup_link(hw);
-
-	/* Set the default MTU size */
-	hw->dev_spec._82575.mtu = 1500;
-
-	/*
-	 * Clear all of the statistics registers (clear on read).  It is
-	 * important that we do this after we have tried to establish link
-	 * because the symbol error count will increment wildly if there
-	 * is no link.
-	 */
-	e1000_clear_hw_cntrs_82575(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_setup_copper_link_82575 - Configure copper link settings
- *  @hw: pointer to the HW structure
- *
- *  Configures the link for auto-neg or forced speed and duplex.  Then we check
- *  for link, once link is established calls to configure collision distance
- *  and flow control are called.
- **/
-static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw)
-{
-	u32 ctrl;
-	s32 ret_val;
-	u32 phpm_reg;
-
-	DEBUGFUNC("e1000_setup_copper_link_82575");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	ctrl |= E1000_CTRL_SLU;
-	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-	/* Clear Go Link Disconnect bit on supported devices */
-	switch (hw->mac.type) {
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i210:
-	case e1000_i211:
-		phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
-		phpm_reg &= ~E1000_82580_PM_GO_LINKD;
-		E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg);
-		break;
-	default:
-		break;
-	}
-
-	ret_val = e1000_setup_serdes_link_82575(hw);
-	if (ret_val)
-		goto out;
-
-	if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) {
-		/* allow time for SFP cage time to power up phy */
-		msec_delay(300);
-
-		ret_val = hw->phy.ops.reset(hw);
-		if (ret_val) {
-			DEBUGOUT("Error resetting the PHY.\n");
-			goto out;
-		}
-	}
-	switch (hw->phy.type) {
-	case e1000_phy_i210:
-	case e1000_phy_m88:
-		switch (hw->phy.id) {
-		case I347AT4_E_PHY_ID:
-		case M88E1112_E_PHY_ID:
-		case M88E1340M_E_PHY_ID:
-		case M88E1543_E_PHY_ID:
-		case I210_I_PHY_ID:
-			ret_val = e1000_copper_link_setup_m88_gen2(hw);
-			break;
-		default:
-			ret_val = e1000_copper_link_setup_m88(hw);
-			break;
-		}
-		break;
-	case e1000_phy_igp_3:
-		ret_val = e1000_copper_link_setup_igp(hw);
-		break;
-	case e1000_phy_82580:
-		ret_val = e1000_copper_link_setup_82577(hw);
-		break;
-	default:
-		ret_val = -E1000_ERR_PHY;
-		break;
-	}
-
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_setup_copper_link_generic(hw);
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_setup_serdes_link_82575 - Setup link for serdes
- *  @hw: pointer to the HW structure
- *
- *  Configure the physical coding sub-layer (PCS) link.  The PCS link is
- *  used on copper connections where the serialized gigabit media independent
- *  interface (sgmii), or serdes fiber is being used.  Configures the link
- *  for auto-negotiation or forces speed/duplex.
- **/
-static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw)
-{
-	u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
-	bool pcs_autoneg;
-	s32 ret_val = E1000_SUCCESS;
-	u16 data;
-
-	DEBUGFUNC("e1000_setup_serdes_link_82575");
-
-	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
-	    !e1000_sgmii_active_82575(hw))
-		return ret_val;
-
-	/*
-	 * On the 82575, SerDes loopback mode persists until it is
-	 * explicitly turned off or a power cycle is performed.  A read to
-	 * the register does not indicate its status.  Therefore, we ensure
-	 * loopback mode is disabled during initialization.
-	 */
-	E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
-
-	/* power on the sfp cage if present */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-
-	ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
-	ctrl_reg |= E1000_CTRL_SLU;
-
-	/* set both sw defined pins on 82575/82576*/
-	if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576)
-		ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
-
-	reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
-
-	/* default pcs_autoneg to the same setting as mac autoneg */
-	pcs_autoneg = hw->mac.autoneg;
-
-	switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) {
-	case E1000_CTRL_EXT_LINK_MODE_SGMII:
-		/* sgmii mode lets the phy handle forcing speed/duplex */
-		pcs_autoneg = true;
-		/* autoneg time out should be disabled for SGMII mode */
-		reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT);
-		break;
-	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
-		/* disable PCS autoneg and support parallel detect only */
-		pcs_autoneg = false;
-		/* fall through to default case */
-	default:
-		if (hw->mac.type == e1000_82575 ||
-		    hw->mac.type == e1000_82576) {
-			ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
-			if (ret_val) {
-				DEBUGOUT("NVM Read Error\n");
-				return ret_val;
-			}
-
-			if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT)
-				pcs_autoneg = false;
-		}
-
-		/*
-		 * non-SGMII modes only supports a speed of 1000/Full for the
-		 * link so it is best to just force the MAC and let the pcs
-		 * link either autoneg or be forced to 1000/Full
-		 */
-		ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
-			    E1000_CTRL_FD | E1000_CTRL_FRCDPX;
-
-		/* set speed of 1000/Full if speed/duplex is forced */
-		reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
-		break;
-	}
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
-
-	/*
-	 * New SerDes mode allows for forcing speed or autonegotiating speed
-	 * at 1gb. Autoneg should be default set by most drivers. This is the
-	 * mode that will be compatible with older link partners and switches.
-	 * However, both are supported by the hardware and some drivers/tools.
-	 */
-	reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP |
-		 E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK);
-
-	if (pcs_autoneg) {
-		/* Set PCS register for autoneg */
-		reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */
-		       E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */
-
-		/* Disable force flow control for autoneg */
-		reg &= ~E1000_PCS_LCTL_FORCE_FCTRL;
-
-		/* Configure flow control advertisement for autoneg */
-		anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
-		anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE);
-
-		switch (hw->fc.requested_mode) {
-		case e1000_fc_full:
-		case e1000_fc_rx_pause:
-			anadv_reg |= E1000_TXCW_ASM_DIR;
-			anadv_reg |= E1000_TXCW_PAUSE;
-			break;
-		case e1000_fc_tx_pause:
-			anadv_reg |= E1000_TXCW_ASM_DIR;
-			break;
-		default:
-			break;
-		}
-
-		E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg);
-
-		DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg);
-	} else {
-		/* Set PCS register for forced link */
-		reg |= E1000_PCS_LCTL_FSD;	/* Force Speed */
-
-		/* Force flow control for forced link */
-		reg |= E1000_PCS_LCTL_FORCE_FCTRL;
-
-		DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg);
-	}
-
-	E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
-
-	if (!pcs_autoneg && !e1000_sgmii_active_82575(hw))
-		e1000_force_mac_fc_generic(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_media_type_82575 - derives current media type.
- *  @hw: pointer to the HW structure
- *
- *  The media type is chosen reflecting few settings.
- *  The following are taken into account:
- *  - link mode set in the current port Init Control Word #3
- *  - current link mode settings in CSR register
- *  - MDIO vs. I2C PHY control interface chosen
- *  - SFP module media type
- **/
-static s32 e1000_get_media_type_82575(struct e1000_hw *hw)
-{
-	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
-	s32 ret_val = E1000_SUCCESS;
-	u32 ctrl_ext = 0;
-	u32 link_mode = 0;
-
-	/* Set internal phy as default */
-	dev_spec->sgmii_active = false;
-	dev_spec->module_plugged = false;
-
-	/* Get CSR setting */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-
-	/* extract link mode setting */
-	link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK;
-
-	switch (link_mode) {
-	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
-		hw->phy.media_type = e1000_media_type_internal_serdes;
-		break;
-	case E1000_CTRL_EXT_LINK_MODE_GMII:
-		hw->phy.media_type = e1000_media_type_copper;
-		break;
-	case E1000_CTRL_EXT_LINK_MODE_SGMII:
-		/* Get phy control interface type set (MDIO vs. I2C)*/
-		if (e1000_sgmii_uses_mdio_82575(hw)) {
-			hw->phy.media_type = e1000_media_type_copper;
-			dev_spec->sgmii_active = true;
-			break;
-		}
-		/* fall through for I2C based SGMII */
-	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
-		/* read media type from SFP EEPROM */
-		ret_val = e1000_set_sfp_media_type_82575(hw);
-		if ((ret_val != E1000_SUCCESS) ||
-		    (hw->phy.media_type == e1000_media_type_unknown)) {
-			/*
-			 * If media type was not identified then return media
-			 * type defined by the CTRL_EXT settings.
-			 */
-			hw->phy.media_type = e1000_media_type_internal_serdes;
-
-			if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) {
-				hw->phy.media_type = e1000_media_type_copper;
-				dev_spec->sgmii_active = true;
-			}
-
-			break;
-		}
-
-		/* do not change link mode for 100BaseFX */
-		if (dev_spec->eth_flags.e100_base_fx)
-			break;
-
-		/* change current link mode setting */
-		ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
-
-		if (hw->phy.media_type == e1000_media_type_copper)
-			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII;
-		else
-			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
-
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-
-		break;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_set_sfp_media_type_82575 - derives SFP module media type.
- *  @hw: pointer to the HW structure
- *
- *  The media type is chosen based on SFP module.
- *  compatibility flags retrieved from SFP ID EEPROM.
- **/
-static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_ERR_CONFIG;
-	u32 ctrl_ext = 0;
-	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
-	struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags;
-	u8 tranceiver_type = 0;
-	s32 timeout = 3;
-
-	/* Turn I2C interface ON and power on sfp cage */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA);
-
-	E1000_WRITE_FLUSH(hw);
-
-	/* Read SFP module data */
-	while (timeout) {
-		ret_val = e1000_read_sfp_data_byte(hw,
-			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET),
-			&tranceiver_type);
-		if (ret_val == E1000_SUCCESS)
-			break;
-		msec_delay(100);
-		timeout--;
-	}
-	if (ret_val != E1000_SUCCESS)
-		goto out;
-
-	ret_val = e1000_read_sfp_data_byte(hw,
-			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET),
-			(u8 *)eth_flags);
-	if (ret_val != E1000_SUCCESS)
-		goto out;
-
-	/* Check if there is some SFP module plugged and powered */
-	if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) ||
-	    (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) {
-		dev_spec->module_plugged = true;
-		if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) {
-			hw->phy.media_type = e1000_media_type_internal_serdes;
-		} else if (eth_flags->e100_base_fx) {
-			dev_spec->sgmii_active = true;
-			hw->phy.media_type = e1000_media_type_internal_serdes;
-		} else if (eth_flags->e1000_base_t) {
-			dev_spec->sgmii_active = true;
-			hw->phy.media_type = e1000_media_type_copper;
-		} else {
-			hw->phy.media_type = e1000_media_type_unknown;
-			DEBUGOUT("PHY module has not been recognized\n");
-			goto out;
-		}
-	} else {
-		hw->phy.media_type = e1000_media_type_unknown;
-	}
-	ret_val = E1000_SUCCESS;
-out:
-	/* Restore I2C interface setting */
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-	return ret_val;
-}
-
-/**
- *  e1000_valid_led_default_82575 - Verify a valid default LED config
- *  @hw: pointer to the HW structure
- *  @data: pointer to the NVM (EEPROM)
- *
- *  Read the EEPROM for the current default LED configuration.  If the
- *  LED configuration is not valid, set to a valid LED configuration.
- **/
-static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_valid_led_default_82575");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		goto out;
-	}
-
-	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
-		switch (hw->phy.media_type) {
-		case e1000_media_type_internal_serdes:
-			*data = ID_LED_DEFAULT_82575_SERDES;
-			break;
-		case e1000_media_type_copper:
-		default:
-			*data = ID_LED_DEFAULT;
-			break;
-		}
-	}
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_sgmii_active_82575 - Return sgmii state
- *  @hw: pointer to the HW structure
- *
- *  82575 silicon has a serialized gigabit media independent interface (sgmii)
- *  which can be enabled for use in the embedded applications.  Simply
- *  return the current state of the sgmii interface.
- **/
-static bool e1000_sgmii_active_82575(struct e1000_hw *hw)
-{
-	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
-	return dev_spec->sgmii_active;
-}
-
-/**
- *  e1000_reset_init_script_82575 - Inits HW defaults after reset
- *  @hw: pointer to the HW structure
- *
- *  Inits recommended HW defaults after a reset when there is no EEPROM
- *  detected. This is only for the 82575.
- **/
-static s32 e1000_reset_init_script_82575(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_reset_init_script_82575");
-
-	if (hw->mac.type == e1000_82575) {
-		DEBUGOUT("Running reset init script for 82575\n");
-		/* SerDes configuration via SERDESCTRL */
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15);
-
-		/* CCM configuration via CCMCTL register */
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00);
-
-		/* PCIe lanes configuration */
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81);
-
-		/* PCIe PLL Configuration */
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00);
-		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_mac_addr_82575 - Read device MAC address
- *  @hw: pointer to the HW structure
- **/
-static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_read_mac_addr_82575");
-
-	/*
-	 * If there's an alternate MAC address place it in RAR0
-	 * so that it will override the Si installed default perm
-	 * address.
-	 */
-	ret_val = e1000_check_alt_mac_addr_generic(hw);
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_read_mac_addr_generic(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_config_collision_dist_82575 - Configure collision distance
- *  @hw: pointer to the HW structure
- *
- *  Configures the collision distance to the default value and is used
- *  during link setup.
- **/
-static void e1000_config_collision_dist_82575(struct e1000_hw *hw)
-{
-	u32 tctl_ext;
-
-	DEBUGFUNC("e1000_config_collision_dist_82575");
-
-	tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT);
-
-	tctl_ext &= ~E1000_TCTL_EXT_COLD;
-	tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT;
-
-	E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- * e1000_power_down_phy_copper_82575 - Remove link during PHY power down
- * @hw: pointer to the HW structure
- *
- * In the case of a PHY power down to save power, or to turn off link during a
- * driver unload, or wake on lan is not enabled, remove the link.
- **/
-static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-
-	if (!(phy->ops.check_reset_block))
-		return;
-
-	/* If the management interface is not enabled, then power down */
-	if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw)))
-		e1000_power_down_phy_copper(hw);
-
-	return;
-}
-
-/**
- *  e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters
- *  @hw: pointer to the HW structure
- *
- *  Clears the hardware counters by reading the counter registers.
- **/
-static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_clear_hw_cntrs_82575");
-
-	e1000_clear_hw_cntrs_base_generic(hw);
-
-	E1000_READ_REG(hw, E1000_PRC64);
-	E1000_READ_REG(hw, E1000_PRC127);
-	E1000_READ_REG(hw, E1000_PRC255);
-	E1000_READ_REG(hw, E1000_PRC511);
-	E1000_READ_REG(hw, E1000_PRC1023);
-	E1000_READ_REG(hw, E1000_PRC1522);
-	E1000_READ_REG(hw, E1000_PTC64);
-	E1000_READ_REG(hw, E1000_PTC127);
-	E1000_READ_REG(hw, E1000_PTC255);
-	E1000_READ_REG(hw, E1000_PTC511);
-	E1000_READ_REG(hw, E1000_PTC1023);
-	E1000_READ_REG(hw, E1000_PTC1522);
-
-	E1000_READ_REG(hw, E1000_ALGNERRC);
-	E1000_READ_REG(hw, E1000_RXERRC);
-	E1000_READ_REG(hw, E1000_TNCRS);
-	E1000_READ_REG(hw, E1000_CEXTERR);
-	E1000_READ_REG(hw, E1000_TSCTC);
-	E1000_READ_REG(hw, E1000_TSCTFC);
-
-	E1000_READ_REG(hw, E1000_MGTPRC);
-	E1000_READ_REG(hw, E1000_MGTPDC);
-	E1000_READ_REG(hw, E1000_MGTPTC);
-
-	E1000_READ_REG(hw, E1000_IAC);
-	E1000_READ_REG(hw, E1000_ICRXOC);
-
-	E1000_READ_REG(hw, E1000_ICRXPTC);
-	E1000_READ_REG(hw, E1000_ICRXATC);
-	E1000_READ_REG(hw, E1000_ICTXPTC);
-	E1000_READ_REG(hw, E1000_ICTXATC);
-	E1000_READ_REG(hw, E1000_ICTXQEC);
-	E1000_READ_REG(hw, E1000_ICTXQMTC);
-	E1000_READ_REG(hw, E1000_ICRXDMTC);
-
-	E1000_READ_REG(hw, E1000_CBTMPC);
-	E1000_READ_REG(hw, E1000_HTDPMC);
-	E1000_READ_REG(hw, E1000_CBRMPC);
-	E1000_READ_REG(hw, E1000_RPTHC);
-	E1000_READ_REG(hw, E1000_HGPTC);
-	E1000_READ_REG(hw, E1000_HTCBDPC);
-	E1000_READ_REG(hw, E1000_HGORCL);
-	E1000_READ_REG(hw, E1000_HGORCH);
-	E1000_READ_REG(hw, E1000_HGOTCL);
-	E1000_READ_REG(hw, E1000_HGOTCH);
-	E1000_READ_REG(hw, E1000_LENERRS);
-
-	/* This register should not be read in copper configurations */
-	if ((hw->phy.media_type == e1000_media_type_internal_serdes) ||
-	    e1000_sgmii_active_82575(hw))
-		E1000_READ_REG(hw, E1000_SCVPC);
-}
-
-/**
- *  e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable
- *  @hw: pointer to the HW structure
- *
- *  After rx enable if managability is enabled then there is likely some
- *  bad data at the start of the fifo and possibly in the DMA fifo.  This
- *  function clears the fifos and flushes any packets that came in as rx was
- *  being enabled.
- **/
-void e1000_rx_fifo_flush_82575(struct e1000_hw *hw)
-{
-	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
-	int i, ms_wait;
-
-	DEBUGFUNC("e1000_rx_fifo_workaround_82575");
-	if (hw->mac.type != e1000_82575 ||
-	    !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN))
-		return;
-
-	/* Disable all Rx queues */
-	for (i = 0; i < 4; i++) {
-		rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
-		E1000_WRITE_REG(hw, E1000_RXDCTL(i),
-				rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE);
-	}
-	/* Poll all queues to verify they have shut down */
-	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
-		msec_delay(1);
-		rx_enabled = 0;
-		for (i = 0; i < 4; i++)
-			rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i));
-		if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE))
-			break;
-	}
-
-	if (ms_wait == 10)
-		DEBUGOUT("Queue disable timed out after 10ms\n");
-
-	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
-	 * incoming packets are rejected.  Set enable and wait 2ms so that
-	 * any packet that was coming in as RCTL.EN was set is flushed
-	 */
-	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
-	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
-
-	rlpml = E1000_READ_REG(hw, E1000_RLPML);
-	E1000_WRITE_REG(hw, E1000_RLPML, 0);
-
-	rctl = E1000_READ_REG(hw, E1000_RCTL);
-	temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP);
-	temp_rctl |= E1000_RCTL_LPE;
-
-	E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl);
-	E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN);
-	E1000_WRITE_FLUSH(hw);
-	msec_delay(2);
-
-	/* Enable Rx queues that were previously enabled and restore our
-	 * previous state
-	 */
-	for (i = 0; i < 4; i++)
-		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]);
-	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-	E1000_WRITE_FLUSH(hw);
-
-	E1000_WRITE_REG(hw, E1000_RLPML, rlpml);
-	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
-
-	/* Flush receive errors generated by workaround */
-	E1000_READ_REG(hw, E1000_ROC);
-	E1000_READ_REG(hw, E1000_RNBC);
-	E1000_READ_REG(hw, E1000_MPC);
-}
-
-/**
- *  e1000_set_pcie_completion_timeout - set pci-e completion timeout
- *  @hw: pointer to the HW structure
- *
- *  The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
- *  however the hardware default for these parts is 500us to 1ms which is less
- *  than the 10ms recommended by the pci-e spec.  To address this we need to
- *  increase the value to either 10ms to 200ms for capability version 1 config,
- *  or 16ms to 55ms for version 2.
- **/
-static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw)
-{
-	u32 gcr = E1000_READ_REG(hw, E1000_GCR);
-	s32 ret_val = E1000_SUCCESS;
-	u16 pcie_devctl2;
-
-	/* only take action if timeout value is defaulted to 0 */
-	if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
-		goto out;
-
-	/*
-	 * if capababilities version is type 1 we can write the
-	 * timeout of 10ms to 200ms through the GCR register
-	 */
-	if (!(gcr & E1000_GCR_CAP_VER2)) {
-		gcr |= E1000_GCR_CMPL_TMOUT_10ms;
-		goto out;
-	}
-
-	/*
-	 * for version 2 capabilities we need to write the config space
-	 * directly in order to set the completion timeout value for
-	 * 16ms to 55ms
-	 */
-	ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					  &pcie_devctl2);
-	if (ret_val)
-		goto out;
-
-	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
-
-	ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					   &pcie_devctl2);
-out:
-	/* disable completion timeout resend */
-	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
-
-	E1000_WRITE_REG(hw, E1000_GCR, gcr);
-	return ret_val;
-}
-
-/**
- *  e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing
- *  @hw: pointer to the hardware struct
- *  @enable: state to enter, either enabled or disabled
- *  @pf: Physical Function pool - do not set anti-spoofing for the PF
- *
- *  enables/disables L2 switch anti-spoofing functionality.
- **/
-void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
-{
-	u32 reg_val, reg_offset;
-
-	switch (hw->mac.type) {
-	case e1000_82576:
-		reg_offset = E1000_DTXSWC;
-		break;
-	case e1000_i350:
-	case e1000_i354:
-		reg_offset = E1000_TXSWC;
-		break;
-	default:
-		return;
-	}
-
-	reg_val = E1000_READ_REG(hw, reg_offset);
-	if (enable) {
-		reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
-			     E1000_DTXSWC_VLAN_SPOOF_MASK);
-		/* The PF can spoof - it has to in order to
-		 * support emulation mode NICs
-		 */
-		reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
-	} else {
-		reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
-			     E1000_DTXSWC_VLAN_SPOOF_MASK);
-	}
-	E1000_WRITE_REG(hw, reg_offset, reg_val);
-}
-
-/**
- *  e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback
- *  @hw: pointer to the hardware struct
- *  @enable: state to enter, either enabled or disabled
- *
- *  enables/disables L2 switch loopback functionality.
- **/
-void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
-{
-	u32 dtxswc;
-
-	switch (hw->mac.type) {
-	case e1000_82576:
-		dtxswc = E1000_READ_REG(hw, E1000_DTXSWC);
-		if (enable)
-			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
-		else
-			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
-		E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc);
-		break;
-	case e1000_i350:
-	case e1000_i354:
-		dtxswc = E1000_READ_REG(hw, E1000_TXSWC);
-		if (enable)
-			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
-		else
-			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
-		E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc);
-		break;
-	default:
-		/* Currently no other hardware supports loopback */
-		break;
-	}
-
-
-}
-
-/**
- *  e1000_vmdq_set_replication_pf - enable or disable vmdq replication
- *  @hw: pointer to the hardware struct
- *  @enable: state to enter, either enabled or disabled
- *
- *  enables/disables replication of packets across multiple pools.
- **/
-void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
-{
-	u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
-
-	if (enable)
-		vt_ctl |= E1000_VT_CTL_VM_REPL_EN;
-	else
-		vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN;
-
-	E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
-}
-
-/**
- *  e1000_read_phy_reg_82580 - Read 82580 MDI control register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the MDI control register in the PHY at offset and stores the
- *  information read to data.
- **/
-static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_read_phy_reg_82580");
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
-
-	hw->phy.ops.release(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_write_phy_reg_82580 - Write 82580 MDI control register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write to register at offset
- *
- *  Writes data to MDI control register in the PHY at offset.
- **/
-static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_write_phy_reg_82580");
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		goto out;
-
-	ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
-
-	hw->phy.ops.release(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
- *  @hw: pointer to the HW structure
- *
- *  This resets the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
- *  the values found in the EEPROM.  This addresses an issue in which these
- *  bits are not restored from EEPROM after reset.
- **/
-static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u32 mdicnfg;
-	u16 nvm_data = 0;
-
-	DEBUGFUNC("e1000_reset_mdicnfg_82580");
-
-	if (hw->mac.type != e1000_82580)
-		goto out;
-	if (!e1000_sgmii_active_82575(hw))
-		goto out;
-
-	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
-				   NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
-				   &nvm_data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		goto out;
-	}
-
-	mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG);
-	if (nvm_data & NVM_WORD24_EXT_MDIO)
-		mdicnfg |= E1000_MDICNFG_EXT_MDIO;
-	if (nvm_data & NVM_WORD24_COM_MDIO)
-		mdicnfg |= E1000_MDICNFG_COM_MDIO;
-	E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg);
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_reset_hw_82580 - Reset hardware
- *  @hw: pointer to the HW structure
- *
- *  This resets function or entire device (all ports, etc.)
- *  to a known state.
- **/
-static s32 e1000_reset_hw_82580(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	/* BH SW mailbox bit in SW_FW_SYNC */
-	u16 swmbsw_mask = E1000_SW_SYNCH_MB;
-	u32 ctrl;
-	bool global_device_reset = hw->dev_spec._82575.global_device_reset;
-
-	DEBUGFUNC("e1000_reset_hw_82580");
-
-	hw->dev_spec._82575.global_device_reset = false;
-
-	/* 82580 does not reliably do global_device_reset due to hw errata */
-	if (hw->mac.type == e1000_82580)
-		global_device_reset = false;
-
-	/* Get current control state. */
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-
-	/*
-	 * Prevent the PCI-E bus from sticking if there is no TLP connection
-	 * on the last TLP read/write transaction when MAC is reset.
-	 */
-	ret_val = e1000_disable_pcie_master_generic(hw);
-	if (ret_val)
-		DEBUGOUT("PCI-E Master disable polling has failed.\n");
-
-	DEBUGOUT("Masking off all interrupts\n");
-	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
-	E1000_WRITE_REG(hw, E1000_RCTL, 0);
-	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
-	E1000_WRITE_FLUSH(hw);
-
-	msec_delay(10);
-
-	/* Determine whether or not a global dev reset is requested */
-	if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw,
-	    swmbsw_mask))
-			global_device_reset = false;
-
-	if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) &
-	    E1000_STAT_DEV_RST_SET))
-		ctrl |= E1000_CTRL_DEV_RST;
-	else
-		ctrl |= E1000_CTRL_RST;
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-	E1000_WRITE_FLUSH(hw);
-
-	/* Add delay to insure DEV_RST has time to complete */
-	if (global_device_reset)
-		msec_delay(5);
-
-	ret_val = e1000_get_auto_rd_done_generic(hw);
-	if (ret_val) {
-		/*
-		 * When auto config read does not complete, do not
-		 * return with an error. This can happen in situations
-		 * where there is no eeprom and prevents getting link.
-		 */
-		DEBUGOUT("Auto Read Done did not complete\n");
-	}
-
-	/* clear global device reset status bit */
-	E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET);
-
-	/* Clear any pending interrupt events. */
-	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
-	E1000_READ_REG(hw, E1000_ICR);
-
-	ret_val = e1000_reset_mdicnfg_82580(hw);
-	if (ret_val)
-		DEBUGOUT("Could not reset MDICNFG based on EEPROM\n");
-
-	/* Install any alternate MAC address into RAR0 */
-	ret_val = e1000_check_alt_mac_addr_generic(hw);
-
-	/* Release semaphore */
-	if (global_device_reset)
-		hw->mac.ops.release_swfw_sync(hw, swmbsw_mask);
-
-	return ret_val;
-}
-
-/**
- *  e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size
- *  @data: data received by reading RXPBS register
- *
- *  The 82580 uses a table based approach for packet buffer allocation sizes.
- *  This function converts the retrieved value into the correct table value
- *     0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7
- *  0x0 36  72 144   1   2   4   8  16
- *  0x8 35  70 140 rsv rsv rsv rsv rsv
- */
-u16 e1000_rxpbs_adjust_82580(u32 data)
-{
-	u16 ret_val = 0;
-
-	if (data < E1000_82580_RXPBS_TABLE_SIZE)
-		ret_val = e1000_82580_rxpbs_table[data];
-
-	return ret_val;
-}
-
-/**
- *  e1000_validate_nvm_checksum_with_offset - Validate EEPROM
- *  checksum
- *  @hw: pointer to the HW structure
- *  @offset: offset in words of the checksum protected region
- *
- *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
- *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
- **/
-s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 checksum = 0;
-	u16 i, nvm_data;
-
-	DEBUGFUNC("e1000_validate_nvm_checksum_with_offset");
-
-	for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) {
-		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error\n");
-			goto out;
-		}
-		checksum += nvm_data;
-	}
-
-	if (checksum != (u16) NVM_SUM) {
-		DEBUGOUT("NVM Checksum Invalid\n");
-		ret_val = -E1000_ERR_NVM;
-		goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_update_nvm_checksum_with_offset - Update EEPROM
- *  checksum
- *  @hw: pointer to the HW structure
- *  @offset: offset in words of the checksum protected region
- *
- *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
- *  up to the checksum.  Then calculates the EEPROM checksum and writes the
- *  value to the EEPROM.
- **/
-s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
-{
-	s32 ret_val;
-	u16 checksum = 0;
-	u16 i, nvm_data;
-
-	DEBUGFUNC("e1000_update_nvm_checksum_with_offset");
-
-	for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) {
-		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error while updating checksum.\n");
-			goto out;
-		}
-		checksum += nvm_data;
-	}
-	checksum = (u16) NVM_SUM - checksum;
-	ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1,
-				    &checksum);
-	if (ret_val)
-		DEBUGOUT("NVM Write Error while updating checksum.\n");
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Calculates the EEPROM section checksum by reading/adding each word of
- *  the EEPROM and then verifies that the sum of the EEPROM is
- *  equal to 0xBABA.
- **/
-static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 eeprom_regions_count = 1;
-	u16 j, nvm_data;
-	u16 nvm_offset;
-
-	DEBUGFUNC("e1000_validate_nvm_checksum_82580");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		goto out;
-	}
-
-	if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) {
-		/* if chekcsums compatibility bit is set validate checksums
-		 * for all 4 ports. */
-		eeprom_regions_count = 4;
-	}
-
-	for (j = 0; j < eeprom_regions_count; j++) {
-		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
-		ret_val = e1000_validate_nvm_checksum_with_offset(hw,
-								  nvm_offset);
-		if (ret_val != E1000_SUCCESS)
-			goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_update_nvm_checksum_82580 - Update EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Updates the EEPROM section checksums for all 4 ports by reading/adding
- *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
- *  checksum and writes the value to the EEPROM.
- **/
-static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 j, nvm_data;
-	u16 nvm_offset;
-
-	DEBUGFUNC("e1000_update_nvm_checksum_82580");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n");
-		goto out;
-	}
-
-	if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) {
-		/* set compatibility bit to validate checksums appropriately */
-		nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK;
-		ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
-					    &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n");
-			goto out;
-		}
-	}
-
-	for (j = 0; j < 4; j++) {
-		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
-		ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
-		if (ret_val)
-			goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Calculates the EEPROM section checksum by reading/adding each word of
- *  the EEPROM and then verifies that the sum of the EEPROM is
- *  equal to 0xBABA.
- **/
-static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 j;
-	u16 nvm_offset;
-
-	DEBUGFUNC("e1000_validate_nvm_checksum_i350");
-
-	for (j = 0; j < 4; j++) {
-		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
-		ret_val = e1000_validate_nvm_checksum_with_offset(hw,
-								  nvm_offset);
-		if (ret_val != E1000_SUCCESS)
-			goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_update_nvm_checksum_i350 - Update EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Updates the EEPROM section checksums for all 4 ports by reading/adding
- *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
- *  checksum and writes the value to the EEPROM.
- **/
-static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 j;
-	u16 nvm_offset;
-
-	DEBUGFUNC("e1000_update_nvm_checksum_i350");
-
-	for (j = 0; j < 4; j++) {
-		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
-		ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
-		if (ret_val != E1000_SUCCESS)
-			goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  __e1000_access_emi_reg - Read/write EMI register
- *  @hw: pointer to the HW structure
- *  @addr: EMI address to program
- *  @data: pointer to value to read/write from/to the EMI address
- *  @read: boolean flag to indicate read or write
- **/
-static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address,
-				  u16 *data, bool read)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("__e1000_access_emi_reg");
-
-	ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
-	if (ret_val)
-		return ret_val;
-
-	if (read)
-		ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data);
-	else
-		ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data);
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_emi_reg - Read Extended Management Interface register
- *  @hw: pointer to the HW structure
- *  @addr: EMI address to program
- *  @data: value to be read from the EMI address
- **/
-s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
-{
-	DEBUGFUNC("e1000_read_emi_reg");
-
-	return __e1000_access_emi_reg(hw, addr, data, true);
-}
-
-/**
- *  e1000_set_eee_i350 - Enable/disable EEE support
- *  @hw: pointer to the HW structure
- *
- *  Enable/disable EEE based on setting in dev_spec structure.
- *
- **/
-s32 e1000_set_eee_i350(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u32 ipcnfg, eeer;
-
-	DEBUGFUNC("e1000_set_eee_i350");
-
-	if ((hw->mac.type < e1000_i350) ||
-	    (hw->phy.media_type != e1000_media_type_copper))
-		goto out;
-	ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG);
-	eeer = E1000_READ_REG(hw, E1000_EEER);
-
-	/* enable or disable per user setting */
-	if (!(hw->dev_spec._82575.eee_disable)) {
-		u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU);
-
-		ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
-		eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
-			 E1000_EEER_LPI_FC);
-
-		/* This bit should not be set in normal operation. */
-		if (eee_su & E1000_EEE_SU_LPI_CLK_STP)
-			DEBUGOUT("LPI Clock Stop Bit should not be set!\n");
-	} else {
-		ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
-		eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
-			  E1000_EEER_LPI_FC);
-	}
-	E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg);
-	E1000_WRITE_REG(hw, E1000_EEER, eeer);
-	E1000_READ_REG(hw, E1000_IPCNFG);
-	E1000_READ_REG(hw, E1000_EEER);
-out:
-
-	return ret_val;
-}
-
-/**
- *  e1000_set_eee_i354 - Enable/disable EEE support
- *  @hw: pointer to the HW structure
- *
- *  Enable/disable EEE legacy mode based on setting in dev_spec structure.
- *
- **/
-s32 e1000_set_eee_i354(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u16 phy_data;
-
-	DEBUGFUNC("e1000_set_eee_i354");
-
-	if ((hw->phy.media_type != e1000_media_type_copper) ||
-	    ((phy->id != M88E1543_E_PHY_ID)))
-		goto out;
-
-	if (!hw->dev_spec._82575.eee_disable) {
-		/* Switch to PHY page 18. */
-		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18);
-		if (ret_val)
-			goto out;
-
-		ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1,
-					    &phy_data);
-		if (ret_val)
-			goto out;
-
-		phy_data |= E1000_M88E1543_EEE_CTRL_1_MS;
-		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1,
-					     phy_data);
-		if (ret_val)
-			goto out;
-
-		/* Return the PHY to page 0. */
-		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
-		if (ret_val)
-			goto out;
-
-		/* Turn on EEE advertisement. */
-		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
-					       E1000_EEE_ADV_DEV_I354,
-					       &phy_data);
-		if (ret_val)
-			goto out;
-
-		phy_data |= E1000_EEE_ADV_100_SUPPORTED |
-			    E1000_EEE_ADV_1000_SUPPORTED;
-		ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
-						E1000_EEE_ADV_DEV_I354,
-						phy_data);
-	} else {
-		/* Turn off EEE advertisement. */
-		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
-					       E1000_EEE_ADV_DEV_I354,
-					       &phy_data);
-		if (ret_val)
-			goto out;
-
-		phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED |
-			      E1000_EEE_ADV_1000_SUPPORTED);
-		ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
-						E1000_EEE_ADV_DEV_I354,
-						phy_data);
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_get_eee_status_i354 - Get EEE status
- *  @hw: pointer to the HW structure
- *  @status: EEE status
- *
- *  Get EEE status by guessing based on whether Tx or Rx LPI indications have
- *  been received.
- **/
-s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u16 phy_data;
-
-	DEBUGFUNC("e1000_get_eee_status_i354");
-
-	/* Check if EEE is supported on this device. */
-	if ((hw->phy.media_type != e1000_media_type_copper) ||
-	    ((phy->id != M88E1543_E_PHY_ID)))
-		goto out;
-
-	ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354,
-				       E1000_PCS_STATUS_DEV_I354,
-				       &phy_data);
-	if (ret_val)
-		goto out;
-
-	*status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD |
-			      E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false;
-
-out:
-	return ret_val;
-}
-
-/* Due to a hw errata, if the host tries to  configure the VFTA register
- * while performing queries from the BMC or DMA, then the VFTA in some
- * cases won't be written.
- */
-
-/**
- *  e1000_clear_vfta_i350 - Clear VLAN filter table
- *  @hw: pointer to the HW structure
- *
- *  Clears the register array which contains the VLAN filter table by
- *  setting all the values to 0.
- **/
-void e1000_clear_vfta_i350(struct e1000_hw *hw)
-{
-	u32 offset;
-	int i;
-
-	DEBUGFUNC("e1000_clear_vfta_350");
-
-	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
-		for (i = 0; i < 10; i++)
-			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
-
-		E1000_WRITE_FLUSH(hw);
-	}
-}
-
-/**
- *  e1000_write_vfta_i350 - Write value to VLAN filter table
- *  @hw: pointer to the HW structure
- *  @offset: register offset in VLAN filter table
- *  @value: register value written to VLAN filter table
- *
- *  Writes value at the given offset in the register array which stores
- *  the VLAN filter table.
- **/
-void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
-{
-	int i;
-
-	DEBUGFUNC("e1000_write_vfta_350");
-
-	for (i = 0; i < 10; i++)
-		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
-
-	E1000_WRITE_FLUSH(hw);
-}
-
-
-/**
- *  e1000_set_i2c_bb - Enable I2C bit-bang
- *  @hw: pointer to the HW structure
- *
- *  Enable I2C bit-bang interface
- *
- **/
-s32 e1000_set_i2c_bb(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u32 ctrl_ext, i2cparams;
-
-	DEBUGFUNC("e1000_set_i2c_bb");
-
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	ctrl_ext |= E1000_CTRL_I2C_ENA;
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-	E1000_WRITE_FLUSH(hw);
-
-	i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS);
-	i2cparams |= E1000_I2CBB_EN;
-	i2cparams |= E1000_I2C_DATA_OE_N;
-	i2cparams |= E1000_I2C_CLK_OE_N;
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams);
-	E1000_WRITE_FLUSH(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_i2c_byte_generic - Reads 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to read
- *  @dev_addr: device address
- *  @data: value read
- *
- *  Performs byte read operation over I2C interface at
- *  a specified device address.
- **/
-s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data)
-{
-	s32 status = E1000_SUCCESS;
-	u32 max_retry = 10;
-	u32 retry = 1;
-	u16 swfw_mask = 0;
-
-	bool nack = true;
-
-	DEBUGFUNC("e1000_read_i2c_byte_generic");
-
-	swfw_mask = E1000_SWFW_PHY0_SM;
-
-	do {
-		if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
-		    != E1000_SUCCESS) {
-			status = E1000_ERR_SWFW_SYNC;
-			goto read_byte_out;
-		}
-
-		e1000_i2c_start(hw);
-
-		/* Device Address and write indication */
-		status = e1000_clock_out_i2c_byte(hw, dev_addr);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_clock_out_i2c_byte(hw, byte_offset);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		e1000_i2c_start(hw);
-
-		/* Device Address and read indication */
-		status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1));
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_clock_in_i2c_byte(hw, data);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_clock_out_i2c_bit(hw, nack);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		e1000_i2c_stop(hw);
-		break;
-
-fail:
-		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-		msec_delay(100);
-		e1000_i2c_bus_clear(hw);
-		retry++;
-		if (retry < max_retry)
-			DEBUGOUT("I2C byte read error - Retrying.\n");
-		else
-			DEBUGOUT("I2C byte read error.\n");
-
-	} while (retry < max_retry);
-
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-read_byte_out:
-
-	return status;
-}
-
-/**
- *  e1000_write_i2c_byte_generic - Writes 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @dev_addr: device address
- *  @data: value to write
- *
- *  Performs byte write operation over I2C interface at
- *  a specified device address.
- **/
-s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data)
-{
-	s32 status = E1000_SUCCESS;
-	u32 max_retry = 1;
-	u32 retry = 0;
-	u16 swfw_mask = 0;
-
-	DEBUGFUNC("e1000_write_i2c_byte_generic");
-
-	swfw_mask = E1000_SWFW_PHY0_SM;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) {
-		status = E1000_ERR_SWFW_SYNC;
-		goto write_byte_out;
-	}
-
-	do {
-		e1000_i2c_start(hw);
-
-		status = e1000_clock_out_i2c_byte(hw, dev_addr);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_clock_out_i2c_byte(hw, byte_offset);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_clock_out_i2c_byte(hw, data);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		status = e1000_get_i2c_ack(hw);
-		if (status != E1000_SUCCESS)
-			goto fail;
-
-		e1000_i2c_stop(hw);
-		break;
-
-fail:
-		e1000_i2c_bus_clear(hw);
-		retry++;
-		if (retry < max_retry)
-			DEBUGOUT("I2C byte write error - Retrying.\n");
-		else
-			DEBUGOUT("I2C byte write error.\n");
-	} while (retry < max_retry);
-
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-write_byte_out:
-
-	return status;
-}
-
-/**
- *  e1000_i2c_start - Sets I2C start condition
- *  @hw: pointer to hardware structure
- *
- *  Sets I2C start condition (High -> Low on SDA while SCL is High)
- **/
-static void e1000_i2c_start(struct e1000_hw *hw)
-{
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	DEBUGFUNC("e1000_i2c_start");
-
-	/* Start condition must begin with data and clock high */
-	e1000_set_i2c_data(hw, &i2cctl, 1);
-	e1000_raise_i2c_clk(hw, &i2cctl);
-
-	/* Setup time for start condition (4.7us) */
-	usec_delay(E1000_I2C_T_SU_STA);
-
-	e1000_set_i2c_data(hw, &i2cctl, 0);
-
-	/* Hold time for start condition (4us) */
-	usec_delay(E1000_I2C_T_HD_STA);
-
-	e1000_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	usec_delay(E1000_I2C_T_LOW);
-
-}
-
-/**
- *  e1000_i2c_stop - Sets I2C stop condition
- *  @hw: pointer to hardware structure
- *
- *  Sets I2C stop condition (Low -> High on SDA while SCL is High)
- **/
-static void e1000_i2c_stop(struct e1000_hw *hw)
-{
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	DEBUGFUNC("e1000_i2c_stop");
-
-	/* Stop condition must begin with data low and clock high */
-	e1000_set_i2c_data(hw, &i2cctl, 0);
-	e1000_raise_i2c_clk(hw, &i2cctl);
-
-	/* Setup time for stop condition (4us) */
-	usec_delay(E1000_I2C_T_SU_STO);
-
-	e1000_set_i2c_data(hw, &i2cctl, 1);
-
-	/* bus free time between stop and start (4.7us)*/
-	usec_delay(E1000_I2C_T_BUF);
-}
-
-/**
- *  e1000_clock_in_i2c_byte - Clocks in one byte via I2C
- *  @hw: pointer to hardware structure
- *  @data: data byte to clock in
- *
- *  Clocks in one byte data via I2C data/clock
- **/
-static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data)
-{
-	s32 i;
-	bool bit = 0;
-
-	DEBUGFUNC("e1000_clock_in_i2c_byte");
-
-	*data = 0;
-	for (i = 7; i >= 0; i--) {
-		e1000_clock_in_i2c_bit(hw, &bit);
-		*data |= bit << i;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_clock_out_i2c_byte - Clocks out one byte via I2C
- *  @hw: pointer to hardware structure
- *  @data: data byte clocked out
- *
- *  Clocks out one byte data via I2C data/clock
- **/
-static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data)
-{
-	s32 status = E1000_SUCCESS;
-	s32 i;
-	u32 i2cctl;
-	bool bit = 0;
-
-	DEBUGFUNC("e1000_clock_out_i2c_byte");
-
-	for (i = 7; i >= 0; i--) {
-		bit = (data >> i) & 0x1;
-		status = e1000_clock_out_i2c_bit(hw, bit);
-
-		if (status != E1000_SUCCESS)
-			break;
-	}
-
-	/* Release SDA line (set high) */
-	i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	i2cctl |= E1000_I2C_DATA_OE_N;
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
-	E1000_WRITE_FLUSH(hw);
-
-	return status;
-}
-
-/**
- *  e1000_get_i2c_ack - Polls for I2C ACK
- *  @hw: pointer to hardware structure
- *
- *  Clocks in/out one bit via I2C data/clock
- **/
-static s32 e1000_get_i2c_ack(struct e1000_hw *hw)
-{
-	s32 status = E1000_SUCCESS;
-	u32 i = 0;
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-	u32 timeout = 10;
-	bool ack = true;
-
-	DEBUGFUNC("e1000_get_i2c_ack");
-
-	e1000_raise_i2c_clk(hw, &i2cctl);
-
-	/* Minimum high period of clock is 4us */
-	usec_delay(E1000_I2C_T_HIGH);
-
-	/* Wait until SCL returns high */
-	for (i = 0; i < timeout; i++) {
-		usec_delay(1);
-		i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-		if (i2cctl & E1000_I2C_CLK_IN)
-			break;
-	}
-	if (!(i2cctl & E1000_I2C_CLK_IN))
-		return E1000_ERR_I2C;
-
-	ack = e1000_get_i2c_data(&i2cctl);
-	if (ack) {
-		DEBUGOUT("I2C ack was not received.\n");
-		status = E1000_ERR_I2C;
-	}
-
-	e1000_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	usec_delay(E1000_I2C_T_LOW);
-
-	return status;
-}
-
-/**
- *  e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
- *  @hw: pointer to hardware structure
- *  @data: read data value
- *
- *  Clocks in one bit via I2C data/clock
- **/
-static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data)
-{
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	DEBUGFUNC("e1000_clock_in_i2c_bit");
-
-	e1000_raise_i2c_clk(hw, &i2cctl);
-
-	/* Minimum high period of clock is 4us */
-	usec_delay(E1000_I2C_T_HIGH);
-
-	i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-	*data = e1000_get_i2c_data(&i2cctl);
-
-	e1000_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	usec_delay(E1000_I2C_T_LOW);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
- *  @hw: pointer to hardware structure
- *  @data: data value to write
- *
- *  Clocks out one bit via I2C data/clock
- **/
-static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data)
-{
-	s32 status;
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	DEBUGFUNC("e1000_clock_out_i2c_bit");
-
-	status = e1000_set_i2c_data(hw, &i2cctl, data);
-	if (status == E1000_SUCCESS) {
-		e1000_raise_i2c_clk(hw, &i2cctl);
-
-		/* Minimum high period of clock is 4us */
-		usec_delay(E1000_I2C_T_HIGH);
-
-		e1000_lower_i2c_clk(hw, &i2cctl);
-
-		/* Minimum low period of clock is 4.7 us.
-		 * This also takes care of the data hold time.
-		 */
-		usec_delay(E1000_I2C_T_LOW);
-	} else {
-		status = E1000_ERR_I2C;
-		DEBUGOUT1("I2C data was not set to %X\n", data);
-	}
-
-	return status;
-}
-/**
- *  e1000_raise_i2c_clk - Raises the I2C SCL clock
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Raises the I2C clock line '0'->'1'
- **/
-static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
-{
-	DEBUGFUNC("e1000_raise_i2c_clk");
-
-	*i2cctl |= E1000_I2C_CLK_OUT;
-	*i2cctl &= ~E1000_I2C_CLK_OE_N;
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
-	E1000_WRITE_FLUSH(hw);
-
-	/* SCL rise time (1000ns) */
-	usec_delay(E1000_I2C_T_RISE);
-}
-
-/**
- *  e1000_lower_i2c_clk - Lowers the I2C SCL clock
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Lowers the I2C clock line '1'->'0'
- **/
-static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
-{
-
-	DEBUGFUNC("e1000_lower_i2c_clk");
-
-	*i2cctl &= ~E1000_I2C_CLK_OUT;
-	*i2cctl &= ~E1000_I2C_CLK_OE_N;
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
-	E1000_WRITE_FLUSH(hw);
-
-	/* SCL fall time (300ns) */
-	usec_delay(E1000_I2C_T_FALL);
-}
-
-/**
- *  e1000_set_i2c_data - Sets the I2C data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *  @data: I2C data value (0 or 1) to set
- *
- *  Sets the I2C data bit
- **/
-static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data)
-{
-	s32 status = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_set_i2c_data");
-
-	if (data)
-		*i2cctl |= E1000_I2C_DATA_OUT;
-	else
-		*i2cctl &= ~E1000_I2C_DATA_OUT;
-
-	*i2cctl &= ~E1000_I2C_DATA_OE_N;
-	*i2cctl |= E1000_I2C_CLK_OE_N;
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
-	E1000_WRITE_FLUSH(hw);
-
-	/* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
-	usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA);
-
-	*i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-	if (data != e1000_get_i2c_data(i2cctl)) {
-		status = E1000_ERR_I2C;
-		DEBUGOUT1("Error - I2C data was not set to %X.\n", data);
-	}
-
-	return status;
-}
-
-/**
- *  e1000_get_i2c_data - Reads the I2C SDA data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Returns the I2C data bit value
- **/
-static bool e1000_get_i2c_data(u32 *i2cctl)
-{
-	bool data;
-
-	DEBUGFUNC("e1000_get_i2c_data");
-
-	if (*i2cctl & E1000_I2C_DATA_IN)
-		data = 1;
-	else
-		data = 0;
-
-	return data;
-}
-
-/**
- *  e1000_i2c_bus_clear - Clears the I2C bus
- *  @hw: pointer to hardware structure
- *
- *  Clears the I2C bus by sending nine clock pulses.
- *  Used when data line is stuck low.
- **/
-void e1000_i2c_bus_clear(struct e1000_hw *hw)
-{
-	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-	u32 i;
-
-	DEBUGFUNC("e1000_i2c_bus_clear");
-
-	e1000_i2c_start(hw);
-
-	e1000_set_i2c_data(hw, &i2cctl, 1);
-
-	for (i = 0; i < 9; i++) {
-		e1000_raise_i2c_clk(hw, &i2cctl);
-
-		/* Min high period of clock is 4us */
-		usec_delay(E1000_I2C_T_HIGH);
-
-		e1000_lower_i2c_clk(hw, &i2cctl);
-
-		/* Min low period of clock is 4.7us*/
-		usec_delay(E1000_I2C_T_LOW);
-	}
-
-	e1000_i2c_start(hw);
-
-	/* Put the i2c bus back to default state */
-	e1000_i2c_stop(hw);
-}
-
-static const u8 e1000_emc_temp_data[4] = {
-	E1000_EMC_INTERNAL_DATA,
-	E1000_EMC_DIODE1_DATA,
-	E1000_EMC_DIODE2_DATA,
-	E1000_EMC_DIODE3_DATA
-};
-static const u8 e1000_emc_therm_limit[4] = {
-	E1000_EMC_INTERNAL_THERM_LIMIT,
-	E1000_EMC_DIODE1_THERM_LIMIT,
-	E1000_EMC_DIODE2_THERM_LIMIT,
-	E1000_EMC_DIODE3_THERM_LIMIT
-};
-
-/**
- *  e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data
- *  @hw: pointer to hardware structure
- *
- *  Updates the temperatures in mac.thermal_sensor_data
- **/
-s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw)
-{
-	s32 status = E1000_SUCCESS;
-	u16 ets_offset;
-	u16 ets_cfg;
-	u16 ets_sensor;
-	u8  num_sensors;
-	u8  sensor_index;
-	u8  sensor_location;
-	u8  i;
-	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
-
-	DEBUGFUNC("e1000_get_thermal_sensor_data_generic");
-
-	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
-		return E1000_NOT_IMPLEMENTED;
-
-	data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF);
-
-	/* Return the internal sensor only if ETS is unsupported */
-	e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
-	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
-		return status;
-
-	e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
-	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
-	    != NVM_ETS_TYPE_EMC)
-		return E1000_NOT_IMPLEMENTED;
-
-	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
-	if (num_sensors > E1000_MAX_SENSORS)
-		num_sensors = E1000_MAX_SENSORS;
-
-	for (i = 1; i < num_sensors; i++) {
-		e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
-		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
-				NVM_ETS_DATA_INDEX_SHIFT);
-		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
-				   NVM_ETS_DATA_LOC_SHIFT);
-
-		if (sensor_location != 0)
-			hw->phy.ops.read_i2c_byte(hw,
-					e1000_emc_temp_data[sensor_index],
-					E1000_I2C_THERMAL_SENSOR_ADDR,
-					&data->sensor[i].temp);
-	}
-	return status;
-}
-
-/**
- *  e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds
- *  @hw: pointer to hardware structure
- *
- *  Sets the thermal sensor thresholds according to the NVM map
- *  and save off the threshold and location values into mac.thermal_sensor_data
- **/
-s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
-{
-	s32 status = E1000_SUCCESS;
-	u16 ets_offset;
-	u16 ets_cfg;
-	u16 ets_sensor;
-	u8  low_thresh_delta;
-	u8  num_sensors;
-	u8  sensor_index;
-	u8  sensor_location;
-	u8  therm_limit;
-	u8  i;
-	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
-
-	DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic");
-
-	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
-		return E1000_NOT_IMPLEMENTED;
-
-	memset(data, 0, sizeof(struct e1000_thermal_sensor_data));
-
-	data->sensor[0].location = 0x1;
-	data->sensor[0].caution_thresh =
-		(E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF);
-	data->sensor[0].max_op_thresh =
-		(E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF);
-
-	/* Return the internal sensor only if ETS is unsupported */
-	e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
-	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
-		return status;
-
-	e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
-	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
-	    != NVM_ETS_TYPE_EMC)
-		return E1000_NOT_IMPLEMENTED;
-
-	low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
-			    NVM_ETS_LTHRES_DELTA_SHIFT);
-	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
-
-	for (i = 1; i <= num_sensors; i++) {
-		e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
-		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
-				NVM_ETS_DATA_INDEX_SHIFT);
-		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
-				   NVM_ETS_DATA_LOC_SHIFT);
-		therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
-
-		hw->phy.ops.write_i2c_byte(hw,
-			e1000_emc_therm_limit[sensor_index],
-			E1000_I2C_THERMAL_SENSOR_ADDR,
-			therm_limit);
-
-		if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) {
-			data->sensor[i].location = sensor_location;
-			data->sensor[i].caution_thresh = therm_limit;
-			data->sensor[i].max_op_thresh = therm_limit -
-							low_thresh_delta;
-		}
-	}
-	return status;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
deleted file mode 100644
index 2e0dbb2f..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
+++ /dev/null
@@ -1,494 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_82575_H_
-#define _E1000_82575_H_
-
-#define ID_LED_DEFAULT_82575_SERDES	((ID_LED_DEF1_DEF2 << 12) | \
-					 (ID_LED_DEF1_DEF2 <<  8) | \
-					 (ID_LED_DEF1_DEF2 <<  4) | \
-					 (ID_LED_OFF1_ON2))
-/*
- * Receive Address Register Count
- * Number of high/low register pairs in the RAR.  The RAR (Receive Address
- * Registers) holds the directed and multicast addresses that we monitor.
- * These entries are also used for MAC-based filtering.
- */
-/*
- * For 82576, there are an additional set of RARs that begin at an offset
- * separate from the first set of RARs.
- */
-#define E1000_RAR_ENTRIES_82575	16
-#define E1000_RAR_ENTRIES_82576	24
-#define E1000_RAR_ENTRIES_82580	24
-#define E1000_RAR_ENTRIES_I350	32
-#define E1000_SW_SYNCH_MB	0x00000100
-#define E1000_STAT_DEV_RST_SET	0x00100000
-#define E1000_CTRL_DEV_RST	0x20000000
-
-struct e1000_adv_data_desc {
-	__le64 buffer_addr;    /* Address of the descriptor's data buffer */
-	union {
-		u32 data;
-		struct {
-			u32 datalen:16; /* Data buffer length */
-			u32 rsvd:4;
-			u32 dtyp:4;  /* Descriptor type */
-			u32 dcmd:8;  /* Descriptor command */
-		} config;
-	} lower;
-	union {
-		u32 data;
-		struct {
-			u32 status:4;  /* Descriptor status */
-			u32 idx:4;
-			u32 popts:6;  /* Packet Options */
-			u32 paylen:18; /* Payload length */
-		} options;
-	} upper;
-};
-
-#define E1000_TXD_DTYP_ADV_C	0x2  /* Advanced Context Descriptor */
-#define E1000_TXD_DTYP_ADV_D	0x3  /* Advanced Data Descriptor */
-#define E1000_ADV_TXD_CMD_DEXT	0x20 /* Descriptor extension (0 = legacy) */
-#define E1000_ADV_TUCMD_IPV4	0x2  /* IP Packet Type: 1=IPv4 */
-#define E1000_ADV_TUCMD_IPV6	0x0  /* IP Packet Type: 0=IPv6 */
-#define E1000_ADV_TUCMD_L4T_UDP	0x0  /* L4 Packet TYPE of UDP */
-#define E1000_ADV_TUCMD_L4T_TCP	0x4  /* L4 Packet TYPE of TCP */
-#define E1000_ADV_TUCMD_MKRREQ	0x10 /* Indicates markers are required */
-#define E1000_ADV_DCMD_EOP	0x1  /* End of Packet */
-#define E1000_ADV_DCMD_IFCS	0x2  /* Insert FCS (Ethernet CRC) */
-#define E1000_ADV_DCMD_RS	0x8  /* Report Status */
-#define E1000_ADV_DCMD_VLE	0x40 /* Add VLAN tag */
-#define E1000_ADV_DCMD_TSE	0x80 /* TCP Seg enable */
-/* Extended Device Control */
-#define E1000_CTRL_EXT_NSICR	0x00000001 /* Disable Intr Clear all on read */
-
-struct e1000_adv_context_desc {
-	union {
-		u32 ip_config;
-		struct {
-			u32 iplen:9;
-			u32 maclen:7;
-			u32 vlan_tag:16;
-		} fields;
-	} ip_setup;
-	u32 seq_num;
-	union {
-		u64 l4_config;
-		struct {
-			u32 mkrloc:9;
-			u32 tucmd:11;
-			u32 dtyp:4;
-			u32 adv:8;
-			u32 rsvd:4;
-			u32 idx:4;
-			u32 l4len:8;
-			u32 mss:16;
-		} fields;
-	} l4_setup;
-};
-
-/* SRRCTL bit definitions */
-#define E1000_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
-#define E1000_SRRCTL_BSIZEHDRSIZE_MASK		0x00000F00
-#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT		2  /* Shift _left_ */
-#define E1000_SRRCTL_DESCTYPE_LEGACY		0x00000000
-#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF	0x02000000
-#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT		0x04000000
-#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS	0x0A000000
-#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION	0x06000000
-#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
-#define E1000_SRRCTL_DESCTYPE_MASK		0x0E000000
-#define E1000_SRRCTL_TIMESTAMP			0x40000000
-#define E1000_SRRCTL_DROP_EN			0x80000000
-
-#define E1000_SRRCTL_BSIZEPKT_MASK		0x0000007F
-#define E1000_SRRCTL_BSIZEHDR_MASK		0x00003F00
-
-#define E1000_TX_HEAD_WB_ENABLE		0x1
-#define E1000_TX_SEQNUM_WB_ENABLE	0x2
-
-#define E1000_MRQC_ENABLE_RSS_4Q		0x00000002
-#define E1000_MRQC_ENABLE_VMDQ			0x00000003
-#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q		0x00000005
-#define E1000_MRQC_RSS_FIELD_IPV4_UDP		0x00400000
-#define E1000_MRQC_RSS_FIELD_IPV6_UDP		0x00800000
-#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX	0x01000000
-#define E1000_MRQC_ENABLE_RSS_8Q		0x00000002
-
-#define E1000_VMRCTL_MIRROR_PORT_SHIFT		8
-#define E1000_VMRCTL_MIRROR_DSTPORT_MASK	(7 << \
-						 E1000_VMRCTL_MIRROR_PORT_SHIFT)
-#define E1000_VMRCTL_POOL_MIRROR_ENABLE		(1 << 0)
-#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE	(1 << 1)
-#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE	(1 << 2)
-
-#define E1000_EICR_TX_QUEUE ( \
-	E1000_EICR_TX_QUEUE0 |    \
-	E1000_EICR_TX_QUEUE1 |    \
-	E1000_EICR_TX_QUEUE2 |    \
-	E1000_EICR_TX_QUEUE3)
-
-#define E1000_EICR_RX_QUEUE ( \
-	E1000_EICR_RX_QUEUE0 |    \
-	E1000_EICR_RX_QUEUE1 |    \
-	E1000_EICR_RX_QUEUE2 |    \
-	E1000_EICR_RX_QUEUE3)
-
-#define E1000_EIMS_RX_QUEUE	E1000_EICR_RX_QUEUE
-#define E1000_EIMS_TX_QUEUE	E1000_EICR_TX_QUEUE
-
-#define EIMS_ENABLE_MASK ( \
-	E1000_EIMS_RX_QUEUE  | \
-	E1000_EIMS_TX_QUEUE  | \
-	E1000_EIMS_TCP_TIMER | \
-	E1000_EIMS_OTHER)
-
-/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
-#define E1000_IMIR_PORT_IM_EN	0x00010000  /* TCP port enable */
-#define E1000_IMIR_PORT_BP	0x00020000  /* TCP port check bypass */
-#define E1000_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
-#define E1000_IMIREXT_CTRL_URG	0x00002000  /* Check URG bit in header */
-#define E1000_IMIREXT_CTRL_ACK	0x00004000  /* Check ACK bit in header */
-#define E1000_IMIREXT_CTRL_PSH	0x00008000  /* Check PSH bit in header */
-#define E1000_IMIREXT_CTRL_RST	0x00010000  /* Check RST bit in header */
-#define E1000_IMIREXT_CTRL_SYN	0x00020000  /* Check SYN bit in header */
-#define E1000_IMIREXT_CTRL_FIN	0x00040000  /* Check FIN bit in header */
-#define E1000_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of ctrl bits */
-
-/* Receive Descriptor - Advanced */
-union e1000_adv_rx_desc {
-	struct {
-		__le64 pkt_addr; /* Packet buffer address */
-		__le64 hdr_addr; /* Header buffer address */
-	} read;
-	struct {
-		struct {
-			union {
-				__le32 data;
-				struct {
-					__le16 pkt_info; /*RSS type, Pkt type*/
-					/* Split Header, header buffer len */
-					__le16 hdr_info;
-				} hs_rss;
-			} lo_dword;
-			union {
-				__le32 rss; /* RSS Hash */
-				struct {
-					__le16 ip_id; /* IP id */
-					__le16 csum; /* Packet Checksum */
-				} csum_ip;
-			} hi_dword;
-		} lower;
-		struct {
-			__le32 status_error; /* ext status/error */
-			__le16 length; /* Packet length */
-			__le16 vlan; /* VLAN tag */
-		} upper;
-	} wb;  /* writeback */
-};
-
-#define E1000_RXDADV_RSSTYPE_MASK	0x0000000F
-#define E1000_RXDADV_RSSTYPE_SHIFT	12
-#define E1000_RXDADV_HDRBUFLEN_MASK	0x7FE0
-#define E1000_RXDADV_HDRBUFLEN_SHIFT	5
-#define E1000_RXDADV_SPLITHEADER_EN	0x00001000
-#define E1000_RXDADV_SPH		0x8000
-#define E1000_RXDADV_STAT_TS		0x10000 /* Pkt was time stamped */
-#define E1000_RXDADV_STAT_TSIP		0x08000 /* timestamp in packet */
-#define E1000_RXDADV_ERR_HBO		0x00800000
-
-/* RSS Hash results */
-#define E1000_RXDADV_RSSTYPE_NONE	0x00000000
-#define E1000_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
-#define E1000_RXDADV_RSSTYPE_IPV4	0x00000002
-#define E1000_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
-#define E1000_RXDADV_RSSTYPE_IPV6_EX	0x00000004
-#define E1000_RXDADV_RSSTYPE_IPV6	0x00000005
-#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
-#define E1000_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
-#define E1000_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
-#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
-
-/* RSS Packet Types as indicated in the receive descriptor */
-#define E1000_RXDADV_PKTTYPE_NONE	0x00000000
-#define E1000_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPV4 hdr present */
-#define E1000_RXDADV_PKTTYPE_IPV4_EX	0x00000020 /* IPV4 hdr + extensions */
-#define E1000_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPV6 hdr present */
-#define E1000_RXDADV_PKTTYPE_IPV6_EX	0x00000080 /* IPV6 hdr + extensions */
-#define E1000_RXDADV_PKTTYPE_TCP	0x00000100 /* TCP hdr present */
-#define E1000_RXDADV_PKTTYPE_UDP	0x00000200 /* UDP hdr present */
-#define E1000_RXDADV_PKTTYPE_SCTP	0x00000400 /* SCTP hdr present */
-#define E1000_RXDADV_PKTTYPE_NFS	0x00000800 /* NFS hdr present */
-
-#define E1000_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
-#define E1000_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
-#define E1000_RXDADV_PKTTYPE_LINKSEC	0x00004000 /* LinkSec Encap */
-#define E1000_RXDADV_PKTTYPE_ETQF	0x00008000 /* PKTTYPE is ETQF index */
-#define E1000_RXDADV_PKTTYPE_ETQF_MASK	0x00000070 /* ETQF has 8 indices */
-#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT	4 /* Right-shift 4 bits */
-
-/* LinkSec results */
-/* Security Processing bit Indication */
-#define E1000_RXDADV_LNKSEC_STATUS_SECP		0x00020000
-#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK	0x18000000
-#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH	0x08000000
-#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR	0x10000000
-#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG	0x18000000
-
-#define E1000_RXDADV_IPSEC_STATUS_SECP			0x00020000
-#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK		0x18000000
-#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL	0x08000000
-#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH		0x10000000
-#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED	0x18000000
-
-/* Transmit Descriptor - Advanced */
-union e1000_adv_tx_desc {
-	struct {
-		__le64 buffer_addr;    /* Address of descriptor's data buf */
-		__le32 cmd_type_len;
-		__le32 olinfo_status;
-	} read;
-	struct {
-		__le64 rsvd;       /* Reserved */
-		__le32 nxtseq_seed;
-		__le32 status;
-	} wb;
-};
-
-/* Adv Transmit Descriptor Config Masks */
-#define E1000_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
-#define E1000_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
-#define E1000_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
-#define E1000_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
-#define E1000_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
-#define E1000_ADVTXD_DCMD_DDTYP_ISCSI	0x10000000 /* DDP hdr type or iSCSI */
-#define E1000_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
-#define E1000_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
-#define E1000_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
-#define E1000_ADVTXD_MAC_LINKSEC	0x00040000 /* Apply LinkSec on pkt */
-#define E1000_ADVTXD_MAC_TSTAMP		0x00080000 /* IEEE1588 Timestamp pkt */
-#define E1000_ADVTXD_STAT_SN_CRC	0x00000002 /* NXTSEQ/SEED prsnt in WB */
-#define E1000_ADVTXD_IDX_SHIFT		4  /* Adv desc Index shift */
-#define E1000_ADVTXD_POPTS_ISCO_1ST	0x00000000 /* 1st TSO of iSCSI PDU */
-#define E1000_ADVTXD_POPTS_ISCO_MDL	0x00000800 /* Middle TSO of iSCSI PDU */
-#define E1000_ADVTXD_POPTS_ISCO_LAST	0x00001000 /* Last TSO of iSCSI PDU */
-/* 1st & Last TSO-full iSCSI PDU*/
-#define E1000_ADVTXD_POPTS_ISCO_FULL	0x00001800
-#define E1000_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
-#define E1000_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
-
-/* Context descriptors */
-struct e1000_adv_tx_context_desc {
-	__le32 vlan_macip_lens;
-	__le32 seqnum_seed;
-	__le32 type_tucmd_mlhl;
-	__le32 mss_l4len_idx;
-};
-
-#define E1000_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
-#define E1000_ADVTXD_VLAN_SHIFT		16  /* Adv ctxt vlan tag shift */
-#define E1000_ADVTXD_TUCMD_IPV4		0x00000400  /* IP Packet Type: 1=IPv4 */
-#define E1000_ADVTXD_TUCMD_IPV6		0x00000000  /* IP Packet Type: 0=IPv6 */
-#define E1000_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
-#define E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
-#define E1000_ADVTXD_TUCMD_L4T_SCTP	0x00001000  /* L4 Packet TYPE of SCTP */
-#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP	0x00002000 /* IPSec Type ESP */
-/* IPSec Encrypt Enable for ESP */
-#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN	0x00004000
-/* Req requires Markers and CRC */
-#define E1000_ADVTXD_TUCMD_MKRREQ	0x00002000
-#define E1000_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
-#define E1000_ADVTXD_MSS_SHIFT		16  /* Adv ctxt MSS shift */
-/* Adv ctxt IPSec SA IDX mask */
-#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK	0x000000FF
-/* Adv ctxt IPSec ESP len mask */
-#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK		0x000000FF
-
-/* Additional Transmit Descriptor Control definitions */
-#define E1000_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
-#define E1000_TXDCTL_SWFLSH		0x04000000 /* Tx Desc. wbk flushing */
-/* Tx Queue Arbitration Priority 0=low, 1=high */
-#define E1000_TXDCTL_PRIORITY		0x08000000
-
-/* Additional Receive Descriptor Control definitions */
-#define E1000_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
-#define E1000_RXDCTL_SWFLSH		0x04000000 /* Rx Desc. wbk flushing */
-
-/* Direct Cache Access (DCA) definitions */
-#define E1000_DCA_CTRL_DCA_ENABLE	0x00000000 /* DCA Enable */
-#define E1000_DCA_CTRL_DCA_DISABLE	0x00000001 /* DCA Disable */
-
-#define E1000_DCA_CTRL_DCA_MODE_CB1	0x00 /* DCA Mode CB1 */
-#define E1000_DCA_CTRL_DCA_MODE_CB2	0x02 /* DCA Mode CB2 */
-
-#define E1000_DCA_RXCTRL_CPUID_MASK	0x0000001F /* Rx CPUID Mask */
-#define E1000_DCA_RXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Rx Desc enable */
-#define E1000_DCA_RXCTRL_HEAD_DCA_EN	(1 << 6) /* DCA Rx Desc header ena */
-#define E1000_DCA_RXCTRL_DATA_DCA_EN	(1 << 7) /* DCA Rx Desc payload ena */
-#define E1000_DCA_RXCTRL_DESC_RRO_EN	(1 << 9) /* DCA Rx Desc Relax Order */
-
-#define E1000_DCA_TXCTRL_CPUID_MASK	0x0000001F /* Tx CPUID Mask */
-#define E1000_DCA_TXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Tx Desc enable */
-#define E1000_DCA_TXCTRL_DESC_RRO_EN	(1 << 9) /* Tx rd Desc Relax Order */
-#define E1000_DCA_TXCTRL_TX_WB_RO_EN	(1 << 11) /* Tx Desc writeback RO bit */
-#define E1000_DCA_TXCTRL_DATA_RRO_EN	(1 << 13) /* Tx rd data Relax Order */
-
-#define E1000_DCA_TXCTRL_CPUID_MASK_82576	0xFF000000 /* Tx CPUID Mask */
-#define E1000_DCA_RXCTRL_CPUID_MASK_82576	0xFF000000 /* Rx CPUID Mask */
-#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576	24 /* Tx CPUID */
-#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576	24 /* Rx CPUID */
-
-/* Additional interrupt register bit definitions */
-#define E1000_ICR_LSECPNS	0x00000020 /* PN threshold - server */
-#define E1000_IMS_LSECPNS	E1000_ICR_LSECPNS /* PN threshold - server */
-#define E1000_ICS_LSECPNS	E1000_ICR_LSECPNS /* PN threshold - server */
-
-/* ETQF register bit definitions */
-#define E1000_ETQF_FILTER_ENABLE	(1 << 26)
-#define E1000_ETQF_IMM_INT		(1 << 29)
-#define E1000_ETQF_1588			(1 << 30)
-#define E1000_ETQF_QUEUE_ENABLE		(1 << 31)
-/*
- * ETQF filter list: one static filter per filter consumer. This is
- *                   to avoid filter collisions later. Add new filters
- *                   here!!
- *
- * Current filters:
- *    EAPOL 802.1x (0x888e): Filter 0
- */
-#define E1000_ETQF_FILTER_EAPOL		0
-
-#define E1000_FTQF_VF_BP		0x00008000
-#define E1000_FTQF_1588_TIME_STAMP	0x08000000
-#define E1000_FTQF_MASK			0xF0000000
-#define E1000_FTQF_MASK_PROTO_BP	0x10000000
-#define E1000_FTQF_MASK_SOURCE_ADDR_BP	0x20000000
-#define E1000_FTQF_MASK_DEST_ADDR_BP	0x40000000
-#define E1000_FTQF_MASK_SOURCE_PORT_BP	0x80000000
-
-#define E1000_NVM_APME_82575		0x0400
-#define MAX_NUM_VFS			7
-
-#define E1000_DTXSWC_MAC_SPOOF_MASK	0x000000FF /* Per VF MAC spoof cntrl */
-#define E1000_DTXSWC_VLAN_SPOOF_MASK	0x0000FF00 /* Per VF VLAN spoof cntrl */
-#define E1000_DTXSWC_LLE_MASK		0x00FF0000 /* Per VF Local LB enables */
-#define E1000_DTXSWC_VLAN_SPOOF_SHIFT	8
-#define E1000_DTXSWC_LLE_SHIFT		16
-#define E1000_DTXSWC_VMDQ_LOOPBACK_EN	(1 << 31)  /* global VF LB enable */
-
-/* Easy defines for setting default pool, would normally be left a zero */
-#define E1000_VT_CTL_DEFAULT_POOL_SHIFT	7
-#define E1000_VT_CTL_DEFAULT_POOL_MASK	(0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT)
-
-/* Other useful VMD_CTL register defines */
-#define E1000_VT_CTL_IGNORE_MAC		(1 << 28)
-#define E1000_VT_CTL_DISABLE_DEF_POOL	(1 << 29)
-#define E1000_VT_CTL_VM_REPL_EN		(1 << 30)
-
-/* Per VM Offload register setup */
-#define E1000_VMOLR_RLPML_MASK	0x00003FFF /* Long Packet Maximum Length mask */
-#define E1000_VMOLR_LPE		0x00010000 /* Accept Long packet */
-#define E1000_VMOLR_RSSE	0x00020000 /* Enable RSS */
-#define E1000_VMOLR_AUPE	0x01000000 /* Accept untagged packets */
-#define E1000_VMOLR_ROMPE	0x02000000 /* Accept overflow multicast */
-#define E1000_VMOLR_ROPE	0x04000000 /* Accept overflow unicast */
-#define E1000_VMOLR_BAM		0x08000000 /* Accept Broadcast packets */
-#define E1000_VMOLR_MPME	0x10000000 /* Multicast promiscuous mode */
-#define E1000_VMOLR_STRVLAN	0x40000000 /* Vlan stripping enable */
-#define E1000_VMOLR_STRCRC	0x80000000 /* CRC stripping enable */
-
-#define E1000_VMOLR_VPE		0x00800000 /* VLAN promiscuous enable */
-#define E1000_VMOLR_UPE		0x20000000 /* Unicast promisuous enable */
-#define E1000_DVMOLR_HIDVLAN	0x20000000 /* Vlan hiding enable */
-#define E1000_DVMOLR_STRVLAN	0x40000000 /* Vlan stripping enable */
-#define E1000_DVMOLR_STRCRC	0x80000000 /* CRC stripping enable */
-
-#define E1000_PBRWAC_WALPB	0x00000007 /* Wrap around event on LAN Rx PB */
-#define E1000_PBRWAC_PBE	0x00000008 /* Rx packet buffer empty */
-
-#define E1000_VLVF_ARRAY_SIZE		32
-#define E1000_VLVF_VLANID_MASK		0x00000FFF
-#define E1000_VLVF_POOLSEL_SHIFT	12
-#define E1000_VLVF_POOLSEL_MASK		(0xFF << E1000_VLVF_POOLSEL_SHIFT)
-#define E1000_VLVF_LVLAN		0x00100000
-#define E1000_VLVF_VLANID_ENABLE	0x80000000
-
-#define E1000_VMVIR_VLANA_DEFAULT	0x40000000 /* Always use default VLAN */
-#define E1000_VMVIR_VLANA_NEVER		0x80000000 /* Never insert VLAN tag */
-
-#define E1000_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
-
-#define E1000_IOVCTL		0x05BBC
-#define E1000_IOVCTL_REUSE_VFQ	0x00000001
-
-#define E1000_RPLOLR_STRVLAN	0x40000000
-#define E1000_RPLOLR_STRCRC	0x80000000
-
-#define E1000_TCTL_EXT_COLD	0x000FFC00
-#define E1000_TCTL_EXT_COLD_SHIFT	10
-
-#define E1000_DTXCTL_8023LL	0x0004
-#define E1000_DTXCTL_VLAN_ADDED	0x0008
-#define E1000_DTXCTL_OOS_ENABLE	0x0010
-#define E1000_DTXCTL_MDP_EN	0x0020
-#define E1000_DTXCTL_SPOOF_INT	0x0040
-
-#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT	(1 << 14)
-
-#define ALL_QUEUES		0xFFFF
-
-/* Rx packet buffer size defines */
-#define E1000_RXPBS_SIZE_MASK_82576	0x0000007F
-void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable);
-void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf);
-void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable);
-s32 e1000_init_nvm_params_82575(struct e1000_hw *hw);
-
-u16 e1000_rxpbs_adjust_82580(u32 data);
-s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data);
-s32 e1000_set_eee_i350(struct e1000_hw *);
-s32 e1000_set_eee_i354(struct e1000_hw *);
-s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *);
-#define E1000_I2C_THERMAL_SENSOR_ADDR	0xF8
-#define E1000_EMC_INTERNAL_DATA		0x00
-#define E1000_EMC_INTERNAL_THERM_LIMIT	0x20
-#define E1000_EMC_DIODE1_DATA		0x01
-#define E1000_EMC_DIODE1_THERM_LIMIT	0x19
-#define E1000_EMC_DIODE2_DATA		0x23
-#define E1000_EMC_DIODE2_THERM_LIMIT	0x1A
-#define E1000_EMC_DIODE3_DATA		0x2A
-#define E1000_EMC_DIODE3_THERM_LIMIT	0x30
-
-s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw);
-s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw);
-
-/* I2C SDA and SCL timing parameters for standard mode */
-#define E1000_I2C_T_HD_STA	4
-#define E1000_I2C_T_LOW		5
-#define E1000_I2C_T_HIGH	4
-#define E1000_I2C_T_SU_STA	5
-#define E1000_I2C_T_HD_DATA	5
-#define E1000_I2C_T_SU_DATA	1
-#define E1000_I2C_T_RISE	1
-#define E1000_I2C_T_FALL	1
-#define E1000_I2C_T_SU_STO	4
-#define E1000_I2C_T_BUF		5
-
-s32 e1000_set_i2c_bb(struct e1000_hw *hw);
-s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data);
-s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data);
-void e1000_i2c_bus_clear(struct e1000_hw *hw);
-#endif /* _E1000_82575_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
deleted file mode 100644
index 3e54e50e..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
+++ /dev/null
@@ -1,1144 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-/**
- *  e1000_init_mac_params - Initialize MAC function pointers
- *  @hw: pointer to the HW structure
- *
- *  This function initializes the function pointers for the MAC
- *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
- **/
-s32 e1000_init_mac_params(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	if (hw->mac.ops.init_params) {
-		ret_val = hw->mac.ops.init_params(hw);
-		if (ret_val) {
-			DEBUGOUT("MAC Initialization Error\n");
-			goto out;
-		}
-	} else {
-		DEBUGOUT("mac.init_mac_params was NULL\n");
-		ret_val = -E1000_ERR_CONFIG;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_init_nvm_params - Initialize NVM function pointers
- *  @hw: pointer to the HW structure
- *
- *  This function initializes the function pointers for the NVM
- *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
- **/
-s32 e1000_init_nvm_params(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	if (hw->nvm.ops.init_params) {
-		ret_val = hw->nvm.ops.init_params(hw);
-		if (ret_val) {
-			DEBUGOUT("NVM Initialization Error\n");
-			goto out;
-		}
-	} else {
-		DEBUGOUT("nvm.init_nvm_params was NULL\n");
-		ret_val = -E1000_ERR_CONFIG;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_init_phy_params - Initialize PHY function pointers
- *  @hw: pointer to the HW structure
- *
- *  This function initializes the function pointers for the PHY
- *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
- **/
-s32 e1000_init_phy_params(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	if (hw->phy.ops.init_params) {
-		ret_val = hw->phy.ops.init_params(hw);
-		if (ret_val) {
-			DEBUGOUT("PHY Initialization Error\n");
-			goto out;
-		}
-	} else {
-		DEBUGOUT("phy.init_phy_params was NULL\n");
-		ret_val =  -E1000_ERR_CONFIG;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_init_mbx_params - Initialize mailbox function pointers
- *  @hw: pointer to the HW structure
- *
- *  This function initializes the function pointers for the PHY
- *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
- **/
-s32 e1000_init_mbx_params(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	if (hw->mbx.ops.init_params) {
-		ret_val = hw->mbx.ops.init_params(hw);
-		if (ret_val) {
-			DEBUGOUT("Mailbox Initialization Error\n");
-			goto out;
-		}
-	} else {
-		DEBUGOUT("mbx.init_mbx_params was NULL\n");
-		ret_val =  -E1000_ERR_CONFIG;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_set_mac_type - Sets MAC type
- *  @hw: pointer to the HW structure
- *
- *  This function sets the mac type of the adapter based on the
- *  device ID stored in the hw structure.
- *  MUST BE FIRST FUNCTION CALLED (explicitly or through
- *  e1000_setup_init_funcs()).
- **/
-s32 e1000_set_mac_type(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_set_mac_type");
-
-	switch (hw->device_id) {
-	case E1000_DEV_ID_82575EB_COPPER:
-	case E1000_DEV_ID_82575EB_FIBER_SERDES:
-	case E1000_DEV_ID_82575GB_QUAD_COPPER:
-		mac->type = e1000_82575;
-		break;
-	case E1000_DEV_ID_82576:
-	case E1000_DEV_ID_82576_FIBER:
-	case E1000_DEV_ID_82576_SERDES:
-	case E1000_DEV_ID_82576_QUAD_COPPER:
-	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
-	case E1000_DEV_ID_82576_NS:
-	case E1000_DEV_ID_82576_NS_SERDES:
-	case E1000_DEV_ID_82576_SERDES_QUAD:
-		mac->type = e1000_82576;
-		break;
-	case E1000_DEV_ID_82580_COPPER:
-	case E1000_DEV_ID_82580_FIBER:
-	case E1000_DEV_ID_82580_SERDES:
-	case E1000_DEV_ID_82580_SGMII:
-	case E1000_DEV_ID_82580_COPPER_DUAL:
-	case E1000_DEV_ID_82580_QUAD_FIBER:
-	case E1000_DEV_ID_DH89XXCC_SGMII:
-	case E1000_DEV_ID_DH89XXCC_SERDES:
-	case E1000_DEV_ID_DH89XXCC_BACKPLANE:
-	case E1000_DEV_ID_DH89XXCC_SFP:
-		mac->type = e1000_82580;
-		break;
-	case E1000_DEV_ID_I350_COPPER:
-	case E1000_DEV_ID_I350_FIBER:
-	case E1000_DEV_ID_I350_SERDES:
-	case E1000_DEV_ID_I350_SGMII:
-	case E1000_DEV_ID_I350_DA4:
-		mac->type = e1000_i350;
-		break;
-	case E1000_DEV_ID_I210_COPPER_FLASHLESS:
-	case E1000_DEV_ID_I210_SERDES_FLASHLESS:
-	case E1000_DEV_ID_I210_COPPER:
-	case E1000_DEV_ID_I210_COPPER_OEM1:
-	case E1000_DEV_ID_I210_COPPER_IT:
-	case E1000_DEV_ID_I210_FIBER:
-	case E1000_DEV_ID_I210_SERDES:
-	case E1000_DEV_ID_I210_SGMII:
-		mac->type = e1000_i210;
-		break;
-	case E1000_DEV_ID_I211_COPPER:
-		mac->type = e1000_i211;
-		break;
-
-	case E1000_DEV_ID_I354_BACKPLANE_1GBPS:
-	case E1000_DEV_ID_I354_SGMII:
-	case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS:
-		mac->type = e1000_i354;
-		break;
-	default:
-		/* Should never have loaded on this device */
-		ret_val = -E1000_ERR_MAC_INIT;
-		break;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_setup_init_funcs - Initializes function pointers
- *  @hw: pointer to the HW structure
- *  @init_device: true will initialize the rest of the function pointers
- *		  getting the device ready for use.  false will only set
- *		  MAC type and the function pointers for the other init
- *		  functions.  Passing false will not generate any hardware
- *		  reads or writes.
- *
- *  This function must be called by a driver in order to use the rest
- *  of the 'shared' code files. Called by drivers only.
- **/
-s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
-{
-	s32 ret_val;
-
-	/* Can't do much good without knowing the MAC type. */
-	ret_val = e1000_set_mac_type(hw);
-	if (ret_val) {
-		DEBUGOUT("ERROR: MAC type could not be set properly.\n");
-		goto out;
-	}
-
-	if (!hw->hw_addr) {
-		DEBUGOUT("ERROR: Registers not mapped\n");
-		ret_val = -E1000_ERR_CONFIG;
-		goto out;
-	}
-
-	/*
-	 * Init function pointers to generic implementations. We do this first
-	 * allowing a driver module to override it afterward.
-	 */
-	e1000_init_mac_ops_generic(hw);
-	e1000_init_phy_ops_generic(hw);
-	e1000_init_nvm_ops_generic(hw);
-	e1000_init_mbx_ops_generic(hw);
-
-	/*
-	 * Set up the init function pointers. These are functions within the
-	 * adapter family file that sets up function pointers for the rest of
-	 * the functions in that family.
-	 */
-	switch (hw->mac.type) {
-	case e1000_82575:
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-		e1000_init_function_pointers_82575(hw);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		e1000_init_function_pointers_i210(hw);
-		break;
-	default:
-		DEBUGOUT("Hardware not supported\n");
-		ret_val = -E1000_ERR_CONFIG;
-		break;
-	}
-
-	/*
-	 * Initialize the rest of the function pointers. These require some
-	 * register reads/writes in some cases.
-	 */
-	if (!(ret_val) && init_device) {
-		ret_val = e1000_init_mac_params(hw);
-		if (ret_val)
-			goto out;
-
-		ret_val = e1000_init_nvm_params(hw);
-		if (ret_val)
-			goto out;
-
-		ret_val = e1000_init_phy_params(hw);
-		if (ret_val)
-			goto out;
-
-		ret_val = e1000_init_mbx_params(hw);
-		if (ret_val)
-			goto out;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_get_bus_info - Obtain bus information for adapter
- *  @hw: pointer to the HW structure
- *
- *  This will obtain information about the HW bus for which the
- *  adapter is attached and stores it in the hw structure. This is a
- *  function pointer entry point called by drivers.
- **/
-s32 e1000_get_bus_info(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.get_bus_info)
-		return hw->mac.ops.get_bus_info(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_clear_vfta - Clear VLAN filter table
- *  @hw: pointer to the HW structure
- *
- *  This clears the VLAN filter table on the adapter. This is a function
- *  pointer entry point called by drivers.
- **/
-void e1000_clear_vfta(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.clear_vfta)
-		hw->mac.ops.clear_vfta(hw);
-}
-
-/**
- *  e1000_write_vfta - Write value to VLAN filter table
- *  @hw: pointer to the HW structure
- *  @offset: the 32-bit offset in which to write the value to.
- *  @value: the 32-bit value to write at location offset.
- *
- *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
- *  table. This is a function pointer entry point called by drivers.
- **/
-void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
-{
-	if (hw->mac.ops.write_vfta)
-		hw->mac.ops.write_vfta(hw, offset, value);
-}
-
-/**
- *  e1000_update_mc_addr_list - Update Multicast addresses
- *  @hw: pointer to the HW structure
- *  @mc_addr_list: array of multicast addresses to program
- *  @mc_addr_count: number of multicast addresses to program
- *
- *  Updates the Multicast Table Array.
- *  The caller must have a packed mc_addr_list of multicast addresses.
- **/
-void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
-			       u32 mc_addr_count)
-{
-	if (hw->mac.ops.update_mc_addr_list)
-		hw->mac.ops.update_mc_addr_list(hw, mc_addr_list,
-						mc_addr_count);
-}
-
-/**
- *  e1000_force_mac_fc - Force MAC flow control
- *  @hw: pointer to the HW structure
- *
- *  Force the MAC's flow control settings. Currently no func pointer exists
- *  and all implementations are handled in the generic version of this
- *  function.
- **/
-s32 e1000_force_mac_fc(struct e1000_hw *hw)
-{
-	return e1000_force_mac_fc_generic(hw);
-}
-
-/**
- *  e1000_check_for_link - Check/Store link connection
- *  @hw: pointer to the HW structure
- *
- *  This checks the link condition of the adapter and stores the
- *  results in the hw->mac structure. This is a function pointer entry
- *  point called by drivers.
- **/
-s32 e1000_check_for_link(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.check_for_link)
-		return hw->mac.ops.check_for_link(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_check_mng_mode - Check management mode
- *  @hw: pointer to the HW structure
- *
- *  This checks if the adapter has manageability enabled.
- *  This is a function pointer entry point called by drivers.
- **/
-bool e1000_check_mng_mode(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.check_mng_mode)
-		return hw->mac.ops.check_mng_mode(hw);
-
-	return false;
-}
-
-/**
- *  e1000_mng_write_dhcp_info - Writes DHCP info to host interface
- *  @hw: pointer to the HW structure
- *  @buffer: pointer to the host interface
- *  @length: size of the buffer
- *
- *  Writes the DHCP information to the host interface.
- **/
-s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
-{
-	return e1000_mng_write_dhcp_info_generic(hw, buffer, length);
-}
-
-/**
- *  e1000_reset_hw - Reset hardware
- *  @hw: pointer to the HW structure
- *
- *  This resets the hardware into a known state. This is a function pointer
- *  entry point called by drivers.
- **/
-s32 e1000_reset_hw(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.reset_hw)
-		return hw->mac.ops.reset_hw(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_init_hw - Initialize hardware
- *  @hw: pointer to the HW structure
- *
- *  This inits the hardware readying it for operation. This is a function
- *  pointer entry point called by drivers.
- **/
-s32 e1000_init_hw(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.init_hw)
-		return hw->mac.ops.init_hw(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_setup_link - Configures link and flow control
- *  @hw: pointer to the HW structure
- *
- *  This configures link and flow control settings for the adapter. This
- *  is a function pointer entry point called by drivers. While modules can
- *  also call this, they probably call their own version of this function.
- **/
-s32 e1000_setup_link(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.setup_link)
-		return hw->mac.ops.setup_link(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_get_speed_and_duplex - Returns current speed and duplex
- *  @hw: pointer to the HW structure
- *  @speed: pointer to a 16-bit value to store the speed
- *  @duplex: pointer to a 16-bit value to store the duplex.
- *
- *  This returns the speed and duplex of the adapter in the two 'out'
- *  variables passed in. This is a function pointer entry point called
- *  by drivers.
- **/
-s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
-{
-	if (hw->mac.ops.get_link_up_info)
-		return hw->mac.ops.get_link_up_info(hw, speed, duplex);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_setup_led - Configures SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  This prepares the SW controllable LED for use and saves the current state
- *  of the LED so it can be later restored. This is a function pointer entry
- *  point called by drivers.
- **/
-s32 e1000_setup_led(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.setup_led)
-		return hw->mac.ops.setup_led(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_cleanup_led - Restores SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  This restores the SW controllable LED to the value saved off by
- *  e1000_setup_led. This is a function pointer entry point called by drivers.
- **/
-s32 e1000_cleanup_led(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.cleanup_led)
-		return hw->mac.ops.cleanup_led(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_blink_led - Blink SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  This starts the adapter LED blinking. Request the LED to be setup first
- *  and cleaned up after. This is a function pointer entry point called by
- *  drivers.
- **/
-s32 e1000_blink_led(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.blink_led)
-		return hw->mac.ops.blink_led(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_id_led_init - store LED configurations in SW
- *  @hw: pointer to the HW structure
- *
- *  Initializes the LED config in SW. This is a function pointer entry point
- *  called by drivers.
- **/
-s32 e1000_id_led_init(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.id_led_init)
-		return hw->mac.ops.id_led_init(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_led_on - Turn on SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  Turns the SW defined LED on. This is a function pointer entry point
- *  called by drivers.
- **/
-s32 e1000_led_on(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.led_on)
-		return hw->mac.ops.led_on(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_led_off - Turn off SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  Turns the SW defined LED off. This is a function pointer entry point
- *  called by drivers.
- **/
-s32 e1000_led_off(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.led_off)
-		return hw->mac.ops.led_off(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_reset_adaptive - Reset adaptive IFS
- *  @hw: pointer to the HW structure
- *
- *  Resets the adaptive IFS. Currently no func pointer exists and all
- *  implementations are handled in the generic version of this function.
- **/
-void e1000_reset_adaptive(struct e1000_hw *hw)
-{
-	e1000_reset_adaptive_generic(hw);
-}
-
-/**
- *  e1000_update_adaptive - Update adaptive IFS
- *  @hw: pointer to the HW structure
- *
- *  Updates adapter IFS. Currently no func pointer exists and all
- *  implementations are handled in the generic version of this function.
- **/
-void e1000_update_adaptive(struct e1000_hw *hw)
-{
-	e1000_update_adaptive_generic(hw);
-}
-
-/**
- *  e1000_disable_pcie_master - Disable PCI-Express master access
- *  @hw: pointer to the HW structure
- *
- *  Disables PCI-Express master access and verifies there are no pending
- *  requests. Currently no func pointer exists and all implementations are
- *  handled in the generic version of this function.
- **/
-s32 e1000_disable_pcie_master(struct e1000_hw *hw)
-{
-	return e1000_disable_pcie_master_generic(hw);
-}
-
-/**
- *  e1000_config_collision_dist - Configure collision distance
- *  @hw: pointer to the HW structure
- *
- *  Configures the collision distance to the default value and is used
- *  during link setup.
- **/
-void e1000_config_collision_dist(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.config_collision_dist)
-		hw->mac.ops.config_collision_dist(hw);
-}
-
-/**
- *  e1000_rar_set - Sets a receive address register
- *  @hw: pointer to the HW structure
- *  @addr: address to set the RAR to
- *  @index: the RAR to set
- *
- *  Sets a Receive Address Register (RAR) to the specified address.
- **/
-void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
-{
-	if (hw->mac.ops.rar_set)
-		hw->mac.ops.rar_set(hw, addr, index);
-}
-
-/**
- *  e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state
- *  @hw: pointer to the HW structure
- *
- *  Ensures that the MDI/MDIX SW state is valid.
- **/
-s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.validate_mdi_setting)
-		return hw->mac.ops.validate_mdi_setting(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_hash_mc_addr - Determines address location in multicast table
- *  @hw: pointer to the HW structure
- *  @mc_addr: Multicast address to hash.
- *
- *  This hashes an address to determine its location in the multicast
- *  table. Currently no func pointer exists and all implementations
- *  are handled in the generic version of this function.
- **/
-u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
-{
-	return e1000_hash_mc_addr_generic(hw, mc_addr);
-}
-
-/**
- *  e1000_enable_tx_pkt_filtering - Enable packet filtering on TX
- *  @hw: pointer to the HW structure
- *
- *  Enables packet filtering on transmit packets if manageability is enabled
- *  and host interface is enabled.
- *  Currently no func pointer exists and all implementations are handled in the
- *  generic version of this function.
- **/
-bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
-{
-	return e1000_enable_tx_pkt_filtering_generic(hw);
-}
-
-/**
- *  e1000_mng_host_if_write - Writes to the manageability host interface
- *  @hw: pointer to the HW structure
- *  @buffer: pointer to the host interface buffer
- *  @length: size of the buffer
- *  @offset: location in the buffer to write to
- *  @sum: sum of the data (not checksum)
- *
- *  This function writes the buffer content at the offset given on the host if.
- *  It also does alignment considerations to do the writes in most efficient
- *  way.  Also fills up the sum of the buffer in *buffer parameter.
- **/
-s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
-			    u16 offset, u8 *sum)
-{
-	return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum);
-}
-
-/**
- *  e1000_mng_write_cmd_header - Writes manageability command header
- *  @hw: pointer to the HW structure
- *  @hdr: pointer to the host interface command header
- *
- *  Writes the command header after does the checksum calculation.
- **/
-s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
-			       struct e1000_host_mng_command_header *hdr)
-{
-	return e1000_mng_write_cmd_header_generic(hw, hdr);
-}
-
-/**
- *  e1000_mng_enable_host_if - Checks host interface is enabled
- *  @hw: pointer to the HW structure
- *
- *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
- *
- *  This function checks whether the HOST IF is enabled for command operation
- *  and also checks whether the previous command is completed.  It busy waits
- *  in case of previous command is not completed.
- **/
-s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
-{
-	return e1000_mng_enable_host_if_generic(hw);
-}
-
-/**
- *  e1000_check_reset_block - Verifies PHY can be reset
- *  @hw: pointer to the HW structure
- *
- *  Checks if the PHY is in a state that can be reset or if manageability
- *  has it tied up. This is a function pointer entry point called by drivers.
- **/
-s32 e1000_check_reset_block(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.check_reset_block)
-		return hw->phy.ops.check_reset_block(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_phy_reg - Reads PHY register
- *  @hw: pointer to the HW structure
- *  @offset: the register to read
- *  @data: the buffer to store the 16-bit read.
- *
- *  Reads the PHY register and returns the value in data.
- *  This is a function pointer entry point called by drivers.
- **/
-s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	if (hw->phy.ops.read_reg)
-		return hw->phy.ops.read_reg(hw, offset, data);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_phy_reg - Writes PHY register
- *  @hw: pointer to the HW structure
- *  @offset: the register to write
- *  @data: the value to write.
- *
- *  Writes the PHY register at offset with the value in data.
- *  This is a function pointer entry point called by drivers.
- **/
-s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	if (hw->phy.ops.write_reg)
-		return hw->phy.ops.write_reg(hw, offset, data);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_release_phy - Generic release PHY
- *  @hw: pointer to the HW structure
- *
- *  Return if silicon family does not require a semaphore when accessing the
- *  PHY.
- **/
-void e1000_release_phy(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.release)
-		hw->phy.ops.release(hw);
-}
-
-/**
- *  e1000_acquire_phy - Generic acquire PHY
- *  @hw: pointer to the HW structure
- *
- *  Return success if silicon family does not require a semaphore when
- *  accessing the PHY.
- **/
-s32 e1000_acquire_phy(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.acquire)
-		return hw->phy.ops.acquire(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_kmrn_reg - Reads register using Kumeran interface
- *  @hw: pointer to the HW structure
- *  @offset: the register to read
- *  @data: the location to store the 16-bit value read.
- *
- *  Reads a register out of the Kumeran interface. Currently no func pointer
- *  exists and all implementations are handled in the generic version of
- *  this function.
- **/
-s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	return e1000_read_kmrn_reg_generic(hw, offset, data);
-}
-
-/**
- *  e1000_write_kmrn_reg - Writes register using Kumeran interface
- *  @hw: pointer to the HW structure
- *  @offset: the register to write
- *  @data: the value to write.
- *
- *  Writes a register to the Kumeran interface. Currently no func pointer
- *  exists and all implementations are handled in the generic version of
- *  this function.
- **/
-s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	return e1000_write_kmrn_reg_generic(hw, offset, data);
-}
-
-/**
- *  e1000_get_cable_length - Retrieves cable length estimation
- *  @hw: pointer to the HW structure
- *
- *  This function estimates the cable length and stores them in
- *  hw->phy.min_length and hw->phy.max_length. This is a function pointer
- *  entry point called by drivers.
- **/
-s32 e1000_get_cable_length(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.get_cable_length)
-		return hw->phy.ops.get_cable_length(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_phy_info - Retrieves PHY information from registers
- *  @hw: pointer to the HW structure
- *
- *  This function gets some information from various PHY registers and
- *  populates hw->phy values with it. This is a function pointer entry
- *  point called by drivers.
- **/
-s32 e1000_get_phy_info(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.get_info)
-		return hw->phy.ops.get_info(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_hw_reset - Hard PHY reset
- *  @hw: pointer to the HW structure
- *
- *  Performs a hard PHY reset. This is a function pointer entry point called
- *  by drivers.
- **/
-s32 e1000_phy_hw_reset(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.reset)
-		return hw->phy.ops.reset(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_commit - Soft PHY reset
- *  @hw: pointer to the HW structure
- *
- *  Performs a soft PHY reset on those that apply. This is a function pointer
- *  entry point called by drivers.
- **/
-s32 e1000_phy_commit(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.commit)
-		return hw->phy.ops.commit(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_set_d0_lplu_state - Sets low power link up state for D0
- *  @hw: pointer to the HW structure
- *  @active: boolean used to enable/disable lplu
- *
- *  Success returns 0, Failure returns 1
- *
- *  The low power link up (lplu) state is set to the power management level D0
- *  and SmartSpeed is disabled when active is true, else clear lplu for D0
- *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
- *  is used during Dx states where the power conservation is most important.
- *  During driver activity, SmartSpeed should be enabled so performance is
- *  maintained.  This is a function pointer entry point called by drivers.
- **/
-s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
-{
-	if (hw->phy.ops.set_d0_lplu_state)
-		return hw->phy.ops.set_d0_lplu_state(hw, active);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_set_d3_lplu_state - Sets low power link up state for D3
- *  @hw: pointer to the HW structure
- *  @active: boolean used to enable/disable lplu
- *
- *  Success returns 0, Failure returns 1
- *
- *  The low power link up (lplu) state is set to the power management level D3
- *  and SmartSpeed is disabled when active is true, else clear lplu for D3
- *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
- *  is used during Dx states where the power conservation is most important.
- *  During driver activity, SmartSpeed should be enabled so performance is
- *  maintained.  This is a function pointer entry point called by drivers.
- **/
-s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
-{
-	if (hw->phy.ops.set_d3_lplu_state)
-		return hw->phy.ops.set_d3_lplu_state(hw, active);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_mac_addr - Reads MAC address
- *  @hw: pointer to the HW structure
- *
- *  Reads the MAC address out of the adapter and stores it in the HW structure.
- *  Currently no func pointer exists and all implementations are handled in the
- *  generic version of this function.
- **/
-s32 e1000_read_mac_addr(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.read_mac_addr)
-		return hw->mac.ops.read_mac_addr(hw);
-
-	return e1000_read_mac_addr_generic(hw);
-}
-
-/**
- *  e1000_read_pba_string - Read device part number string
- *  @hw: pointer to the HW structure
- *  @pba_num: pointer to device part number
- *  @pba_num_size: size of part number buffer
- *
- *  Reads the product board assembly (PBA) number from the EEPROM and stores
- *  the value in pba_num.
- *  Currently no func pointer exists and all implementations are handled in the
- *  generic version of this function.
- **/
-s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size)
-{
-	return e1000_read_pba_string_generic(hw, pba_num, pba_num_size);
-}
-
-/**
- *  e1000_read_pba_length - Read device part number string length
- *  @hw: pointer to the HW structure
- *  @pba_num_size: size of part number buffer
- *
- *  Reads the product board assembly (PBA) number length from the EEPROM and
- *  stores the value in pba_num.
- *  Currently no func pointer exists and all implementations are handled in the
- *  generic version of this function.
- **/
-s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size)
-{
-	return e1000_read_pba_length_generic(hw, pba_num_size);
-}
-
-/**
- *  e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum
- *  @hw: pointer to the HW structure
- *
- *  Validates the NVM checksum is correct. This is a function pointer entry
- *  point called by drivers.
- **/
-s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
-{
-	if (hw->nvm.ops.validate)
-		return hw->nvm.ops.validate(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum
- *  @hw: pointer to the HW structure
- *
- *  Updates the NVM checksum. Currently no func pointer exists and all
- *  implementations are handled in the generic version of this function.
- **/
-s32 e1000_update_nvm_checksum(struct e1000_hw *hw)
-{
-	if (hw->nvm.ops.update)
-		return hw->nvm.ops.update(hw);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_reload_nvm - Reloads EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
- *  extended control register.
- **/
-void e1000_reload_nvm(struct e1000_hw *hw)
-{
-	if (hw->nvm.ops.reload)
-		hw->nvm.ops.reload(hw);
-}
-
-/**
- *  e1000_read_nvm - Reads NVM (EEPROM)
- *  @hw: pointer to the HW structure
- *  @offset: the word offset to read
- *  @words: number of 16-bit words to read
- *  @data: pointer to the properly sized buffer for the data.
- *
- *  Reads 16-bit chunks of data from the NVM (EEPROM). This is a function
- *  pointer entry point called by drivers.
- **/
-s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
-{
-	if (hw->nvm.ops.read)
-		return hw->nvm.ops.read(hw, offset, words, data);
-
-	return -E1000_ERR_CONFIG;
-}
-
-/**
- *  e1000_write_nvm - Writes to NVM (EEPROM)
- *  @hw: pointer to the HW structure
- *  @offset: the word offset to read
- *  @words: number of 16-bit words to write
- *  @data: pointer to the properly sized buffer for the data.
- *
- *  Writes 16-bit chunks of data to the NVM (EEPROM). This is a function
- *  pointer entry point called by drivers.
- **/
-s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
-{
-	if (hw->nvm.ops.write)
-		return hw->nvm.ops.write(hw, offset, words, data);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_8bit_ctrl_reg - Writes 8bit Control register
- *  @hw: pointer to the HW structure
- *  @reg: 32bit register offset
- *  @offset: the register to write
- *  @data: the value to write.
- *
- *  Writes the PHY register at offset with the value in data.
- *  This is a function pointer entry point called by drivers.
- **/
-s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
-			      u8 data)
-{
-	return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data);
-}
-
-/**
- * e1000_power_up_phy - Restores link in case of PHY power down
- * @hw: pointer to the HW structure
- *
- * The phy may be powered down to save power, to turn off link when the
- * driver is unloaded, or wake on lan is not enabled (among others).
- **/
-void e1000_power_up_phy(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.power_up)
-		hw->phy.ops.power_up(hw);
-
-	e1000_setup_link(hw);
-}
-
-/**
- * e1000_power_down_phy - Power down PHY
- * @hw: pointer to the HW structure
- *
- * The phy may be powered down to save power, to turn off link when the
- * driver is unloaded, or wake on lan is not enabled (among others).
- **/
-void e1000_power_down_phy(struct e1000_hw *hw)
-{
-	if (hw->phy.ops.power_down)
-		hw->phy.ops.power_down(hw);
-}
-
-/**
- *  e1000_power_up_fiber_serdes_link - Power up serdes link
- *  @hw: pointer to the HW structure
- *
- *  Power on the optics and PCS.
- **/
-void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.power_up_serdes)
-		hw->mac.ops.power_up_serdes(hw);
-}
-
-/**
- *  e1000_shutdown_fiber_serdes_link - Remove link during power down
- *  @hw: pointer to the HW structure
- *
- *  Shutdown the optics and PCS on driver unload.
- **/
-void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.shutdown_serdes)
-		hw->mac.ops.shutdown_serdes(hw);
-}
-
-/**
- *  e1000_get_thermal_sensor_data - Gathers thermal sensor data
- *  @hw: pointer to hardware structure
- *
- *  Updates the temperatures in mac.thermal_sensor_data
- **/
-s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.get_thermal_sensor_data)
-		return hw->mac.ops.get_thermal_sensor_data(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds
- *  @hw: pointer to hardware structure
- *
- *  Sets the thermal sensor thresholds according to the NVM map
- **/
-s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw)
-{
-	if (hw->mac.ops.init_thermal_sensor_thresh)
-		return hw->mac.ops.init_thermal_sensor_thresh(hw);
-
-	return E1000_SUCCESS;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
deleted file mode 100644
index 0bc00acd..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_API_H_
-#define _E1000_API_H_
-
-#include "e1000_hw.h"
-
-extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
-extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw);
-extern void e1000_init_function_pointers_vf(struct e1000_hw *hw);
-extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw);
-extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw);
-extern void e1000_init_function_pointers_i210(struct e1000_hw *hw);
-
-s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr);
-s32 e1000_set_mac_type(struct e1000_hw *hw);
-s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device);
-s32 e1000_init_mac_params(struct e1000_hw *hw);
-s32 e1000_init_nvm_params(struct e1000_hw *hw);
-s32 e1000_init_phy_params(struct e1000_hw *hw);
-s32 e1000_init_mbx_params(struct e1000_hw *hw);
-s32 e1000_get_bus_info(struct e1000_hw *hw);
-void e1000_clear_vfta(struct e1000_hw *hw);
-void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
-s32 e1000_force_mac_fc(struct e1000_hw *hw);
-s32 e1000_check_for_link(struct e1000_hw *hw);
-s32 e1000_reset_hw(struct e1000_hw *hw);
-s32 e1000_init_hw(struct e1000_hw *hw);
-s32 e1000_setup_link(struct e1000_hw *hw);
-s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex);
-s32 e1000_disable_pcie_master(struct e1000_hw *hw);
-void e1000_config_collision_dist(struct e1000_hw *hw);
-void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
-u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
-void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
-			       u32 mc_addr_count);
-s32 e1000_setup_led(struct e1000_hw *hw);
-s32 e1000_cleanup_led(struct e1000_hw *hw);
-s32 e1000_check_reset_block(struct e1000_hw *hw);
-s32 e1000_blink_led(struct e1000_hw *hw);
-s32 e1000_led_on(struct e1000_hw *hw);
-s32 e1000_led_off(struct e1000_hw *hw);
-s32 e1000_id_led_init(struct e1000_hw *hw);
-void e1000_reset_adaptive(struct e1000_hw *hw);
-void e1000_update_adaptive(struct e1000_hw *hw);
-s32 e1000_get_cable_length(struct e1000_hw *hw);
-s32 e1000_validate_mdi_setting(struct e1000_hw *hw);
-s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data);
-s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data);
-s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
-			      u8 data);
-s32 e1000_get_phy_info(struct e1000_hw *hw);
-void e1000_release_phy(struct e1000_hw *hw);
-s32 e1000_acquire_phy(struct e1000_hw *hw);
-s32 e1000_phy_hw_reset(struct e1000_hw *hw);
-s32 e1000_phy_commit(struct e1000_hw *hw);
-void e1000_power_up_phy(struct e1000_hw *hw);
-void e1000_power_down_phy(struct e1000_hw *hw);
-s32 e1000_read_mac_addr(struct e1000_hw *hw);
-s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size);
-s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size);
-void e1000_reload_nvm(struct e1000_hw *hw);
-s32 e1000_update_nvm_checksum(struct e1000_hw *hw);
-s32 e1000_validate_nvm_checksum(struct e1000_hw *hw);
-s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
-s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
-s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
-s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
-s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active);
-s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
-bool e1000_check_mng_mode(struct e1000_hw *hw);
-bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
-s32 e1000_mng_enable_host_if(struct e1000_hw *hw);
-s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
-			    u16 offset, u8 *sum);
-s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
-			       struct e1000_host_mng_command_header *hdr);
-s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length);
-s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw);
-s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw);
-
-
-
-/*
- * TBI_ACCEPT macro definition:
- *
- * This macro requires:
- *      adapter = a pointer to struct e1000_hw
- *      status = the 8 bit status field of the Rx descriptor with EOP set
- *      error = the 8 bit error field of the Rx descriptor with EOP set
- *      length = the sum of all the length fields of the Rx descriptors that
- *               make up the current frame
- *      last_byte = the last byte of the frame DMAed by the hardware
- *      max_frame_length = the maximum frame length we want to accept.
- *      min_frame_length = the minimum frame length we want to accept.
- *
- * This macro is a conditional that should be used in the interrupt
- * handler's Rx processing routine when RxErrors have been detected.
- *
- * Typical use:
- *  ...
- *  if (TBI_ACCEPT) {
- *      accept_frame = true;
- *      e1000_tbi_adjust_stats(adapter, MacAddress);
- *      frame_length--;
- *  } else {
- *      accept_frame = false;
- *  }
- *  ...
- */
-
-/* The carrier extension symbol, as received by the NIC. */
-#define CARRIER_EXTENSION   0x0F
-
-#define TBI_ACCEPT(a, status, errors, length, last_byte, \
-		   min_frame_size, max_frame_size) \
-	(e1000_tbi_sbp_enabled_82543(a) && \
-	 (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
-	 ((last_byte) == CARRIER_EXTENSION) && \
-	 (((status) & E1000_RXD_STAT_VP) ? \
-	  (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \
-	  ((length) <= (max_frame_size + 1))) : \
-	  (((length) > min_frame_size) && \
-	  ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1)))))
-
-#ifndef E1000_MAX
-#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b))
-#endif
-#ifndef E1000_DIVIDE_ROUND_UP
-#define E1000_DIVIDE_ROUND_UP(a, b)	(((a) + (b) - 1) / (b)) /* ceil(a/b) */
-#endif
-#endif /* _E1000_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
deleted file mode 100644
index b39aaf80..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
+++ /dev/null
@@ -1,1365 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_DEFINES_H_
-#define _E1000_DEFINES_H_
-
-/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define REQ_TX_DESCRIPTOR_MULTIPLE  8
-#define REQ_RX_DESCRIPTOR_MULTIPLE  8
-
-/* Definitions for power management and wakeup registers */
-/* Wake Up Control */
-#define E1000_WUC_APME		0x00000001 /* APM Enable */
-#define E1000_WUC_PME_EN	0x00000002 /* PME Enable */
-#define E1000_WUC_PME_STATUS	0x00000004 /* PME Status */
-#define E1000_WUC_APMPME	0x00000008 /* Assert PME on APM Wakeup */
-#define E1000_WUC_PHY_WAKE	0x00000100 /* if PHY supports wakeup */
-
-/* Wake Up Filter Control */
-#define E1000_WUFC_LNKC	0x00000001 /* Link Status Change Wakeup Enable */
-#define E1000_WUFC_MAG	0x00000002 /* Magic Packet Wakeup Enable */
-#define E1000_WUFC_EX	0x00000004 /* Directed Exact Wakeup Enable */
-#define E1000_WUFC_MC	0x00000008 /* Directed Multicast Wakeup Enable */
-#define E1000_WUFC_BC	0x00000010 /* Broadcast Wakeup Enable */
-#define E1000_WUFC_ARP	0x00000020 /* ARP Request Packet Wakeup Enable */
-#define E1000_WUFC_IPV4	0x00000040 /* Directed IPv4 Packet Wakeup Enable */
-#define E1000_WUFC_FLX0		0x00010000 /* Flexible Filter 0 Enable */
-
-/* Wake Up Status */
-#define E1000_WUS_LNKC		E1000_WUFC_LNKC
-#define E1000_WUS_MAG		E1000_WUFC_MAG
-#define E1000_WUS_EX		E1000_WUFC_EX
-#define E1000_WUS_MC		E1000_WUFC_MC
-#define E1000_WUS_BC		E1000_WUFC_BC
-
-/* Extended Device Control */
-#define E1000_CTRL_EXT_SDP4_DATA	0x00000010 /* SW Definable Pin 4 data */
-#define E1000_CTRL_EXT_SDP6_DATA	0x00000040 /* SW Definable Pin 6 data */
-#define E1000_CTRL_EXT_SDP3_DATA	0x00000080 /* SW Definable Pin 3 data */
-#define E1000_CTRL_EXT_SDP6_DIR	0x00000400 /* Direction of SDP6 0=in 1=out */
-#define E1000_CTRL_EXT_SDP3_DIR	0x00000800 /* Direction of SDP3 0=in 1=out */
-#define E1000_CTRL_EXT_EE_RST	0x00002000 /* Reinitialize from EEPROM */
-/* Physical Func Reset Done Indication */
-#define E1000_CTRL_EXT_PFRSTD	0x00004000
-#define E1000_CTRL_EXT_SPD_BYPS	0x00008000 /* Speed Select Bypass */
-#define E1000_CTRL_EXT_RO_DIS	0x00020000 /* Relaxed Ordering disable */
-#define E1000_CTRL_EXT_DMA_DYN_CLK_EN	0x00080000 /* DMA Dynamic Clk Gating */
-#define E1000_CTRL_EXT_LINK_MODE_MASK	0x00C00000
-/* Offset of the link mode field in Ctrl Ext register */
-#define E1000_CTRL_EXT_LINK_MODE_OFFSET	22
-#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX	0x00400000
-#define E1000_CTRL_EXT_LINK_MODE_GMII	0x00000000
-#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES	0x00C00000
-#define E1000_CTRL_EXT_LINK_MODE_SGMII	0x00800000
-#define E1000_CTRL_EXT_EIAME		0x01000000
-#define E1000_CTRL_EXT_IRCA		0x00000001
-#define E1000_CTRL_EXT_DRV_LOAD		0x10000000 /* Drv loaded bit for FW */
-#define E1000_CTRL_EXT_IAME		0x08000000 /* Int ACK Auto-mask */
-#define E1000_CTRL_EXT_PBA_CLR		0x80000000 /* PBA Clear */
-#define E1000_I2CCMD_REG_ADDR_SHIFT	16
-#define E1000_I2CCMD_PHY_ADDR_SHIFT	24
-#define E1000_I2CCMD_OPCODE_READ	0x08000000
-#define E1000_I2CCMD_OPCODE_WRITE	0x00000000
-#define E1000_I2CCMD_READY		0x20000000
-#define E1000_I2CCMD_ERROR		0x80000000
-#define E1000_I2CCMD_SFP_DATA_ADDR(a)	(0x0000 + (a))
-#define E1000_I2CCMD_SFP_DIAG_ADDR(a)	(0x0100 + (a))
-#define E1000_MAX_SGMII_PHY_REG_ADDR	255
-#define E1000_I2CCMD_PHY_TIMEOUT	200
-#define E1000_IVAR_VALID	0x80
-#define E1000_GPIE_NSICR	0x00000001
-#define E1000_GPIE_MSIX_MODE	0x00000010
-#define E1000_GPIE_EIAME	0x40000000
-#define E1000_GPIE_PBA		0x80000000
-
-/* Receive Descriptor bit definitions */
-#define E1000_RXD_STAT_DD	0x01    /* Descriptor Done */
-#define E1000_RXD_STAT_EOP	0x02    /* End of Packet */
-#define E1000_RXD_STAT_IXSM	0x04    /* Ignore checksum */
-#define E1000_RXD_STAT_VP	0x08    /* IEEE VLAN Packet */
-#define E1000_RXD_STAT_UDPCS	0x10    /* UDP xsum calculated */
-#define E1000_RXD_STAT_TCPCS	0x20    /* TCP xsum calculated */
-#define E1000_RXD_STAT_IPCS	0x40    /* IP xsum calculated */
-#define E1000_RXD_STAT_PIF	0x80    /* passed in-exact filter */
-#define E1000_RXD_STAT_IPIDV	0x200   /* IP identification valid */
-#define E1000_RXD_STAT_UDPV	0x400   /* Valid UDP checksum */
-#define E1000_RXD_STAT_DYNINT	0x800   /* Pkt caused INT via DYNINT */
-#define E1000_RXD_ERR_CE	0x01    /* CRC Error */
-#define E1000_RXD_ERR_SE	0x02    /* Symbol Error */
-#define E1000_RXD_ERR_SEQ	0x04    /* Sequence Error */
-#define E1000_RXD_ERR_CXE	0x10    /* Carrier Extension Error */
-#define E1000_RXD_ERR_TCPE	0x20    /* TCP/UDP Checksum Error */
-#define E1000_RXD_ERR_IPE	0x40    /* IP Checksum Error */
-#define E1000_RXD_ERR_RXE	0x80    /* Rx Data Error */
-#define E1000_RXD_SPC_VLAN_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
-
-#define E1000_RXDEXT_STATERR_TST	0x00000100 /* Time Stamp taken */
-#define E1000_RXDEXT_STATERR_LB		0x00040000
-#define E1000_RXDEXT_STATERR_CE		0x01000000
-#define E1000_RXDEXT_STATERR_SE		0x02000000
-#define E1000_RXDEXT_STATERR_SEQ	0x04000000
-#define E1000_RXDEXT_STATERR_CXE	0x10000000
-#define E1000_RXDEXT_STATERR_TCPE	0x20000000
-#define E1000_RXDEXT_STATERR_IPE	0x40000000
-#define E1000_RXDEXT_STATERR_RXE	0x80000000
-
-/* mask to determine if packets should be dropped due to frame errors */
-#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
-	E1000_RXD_ERR_CE  |		\
-	E1000_RXD_ERR_SE  |		\
-	E1000_RXD_ERR_SEQ |		\
-	E1000_RXD_ERR_CXE |		\
-	E1000_RXD_ERR_RXE)
-
-/* Same mask, but for extended and packet split descriptors */
-#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
-	E1000_RXDEXT_STATERR_CE  |	\
-	E1000_RXDEXT_STATERR_SE  |	\
-	E1000_RXDEXT_STATERR_SEQ |	\
-	E1000_RXDEXT_STATERR_CXE |	\
-	E1000_RXDEXT_STATERR_RXE)
-
-#define E1000_MRQC_RSS_FIELD_MASK		0xFFFF0000
-#define E1000_MRQC_RSS_FIELD_IPV4_TCP		0x00010000
-#define E1000_MRQC_RSS_FIELD_IPV4		0x00020000
-#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX	0x00040000
-#define E1000_MRQC_RSS_FIELD_IPV6		0x00100000
-#define E1000_MRQC_RSS_FIELD_IPV6_TCP		0x00200000
-
-#define E1000_RXDPS_HDRSTAT_HDRSP		0x00008000
-
-/* Management Control */
-#define E1000_MANC_SMBUS_EN	0x00000001 /* SMBus Enabled - RO */
-#define E1000_MANC_ASF_EN	0x00000002 /* ASF Enabled - RO */
-#define E1000_MANC_ARP_EN	0x00002000 /* Enable ARP Request Filtering */
-#define E1000_MANC_RCV_TCO_EN	0x00020000 /* Receive TCO Packets Enabled */
-#define E1000_MANC_BLK_PHY_RST_ON_IDE	0x00040000 /* Block phy resets */
-/* Enable MAC address filtering */
-#define E1000_MANC_EN_MAC_ADDR_FILTER	0x00100000
-/* Enable MNG packets to host memory */
-#define E1000_MANC_EN_MNG2HOST		0x00200000
-
-#define E1000_MANC2H_PORT_623		0x00000020 /* Port 0x26f */
-#define E1000_MANC2H_PORT_664		0x00000040 /* Port 0x298 */
-#define E1000_MDEF_PORT_623		0x00000800 /* Port 0x26f */
-#define E1000_MDEF_PORT_664		0x00000400 /* Port 0x298 */
-
-/* Receive Control */
-#define E1000_RCTL_RST		0x00000001 /* Software reset */
-#define E1000_RCTL_EN		0x00000002 /* enable */
-#define E1000_RCTL_SBP		0x00000004 /* store bad packet */
-#define E1000_RCTL_UPE		0x00000008 /* unicast promisc enable */
-#define E1000_RCTL_MPE		0x00000010 /* multicast promisc enable */
-#define E1000_RCTL_LPE		0x00000020 /* long packet enable */
-#define E1000_RCTL_LBM_NO	0x00000000 /* no loopback mode */
-#define E1000_RCTL_LBM_MAC	0x00000040 /* MAC loopback mode */
-#define E1000_RCTL_LBM_TCVR	0x000000C0 /* tcvr loopback mode */
-#define E1000_RCTL_DTYP_PS	0x00000400 /* Packet Split descriptor */
-#define E1000_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
-#define E1000_RCTL_MO_SHIFT	12 /* multicast offset shift */
-#define E1000_RCTL_MO_3		0x00003000 /* multicast offset 15:4 */
-#define E1000_RCTL_BAM		0x00008000 /* broadcast enable */
-/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
-#define E1000_RCTL_SZ_2048	0x00000000 /* Rx buffer size 2048 */
-#define E1000_RCTL_SZ_1024	0x00010000 /* Rx buffer size 1024 */
-#define E1000_RCTL_SZ_512	0x00020000 /* Rx buffer size 512 */
-#define E1000_RCTL_SZ_256	0x00030000 /* Rx buffer size 256 */
-/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
-#define E1000_RCTL_SZ_16384	0x00010000 /* Rx buffer size 16384 */
-#define E1000_RCTL_SZ_8192	0x00020000 /* Rx buffer size 8192 */
-#define E1000_RCTL_SZ_4096	0x00030000 /* Rx buffer size 4096 */
-#define E1000_RCTL_VFE		0x00040000 /* vlan filter enable */
-#define E1000_RCTL_CFIEN	0x00080000 /* canonical form enable */
-#define E1000_RCTL_CFI		0x00100000 /* canonical form indicator */
-#define E1000_RCTL_DPF		0x00400000 /* discard pause frames */
-#define E1000_RCTL_PMCF		0x00800000 /* pass MAC control frames */
-#define E1000_RCTL_BSEX		0x02000000 /* Buffer size extension */
-#define E1000_RCTL_SECRC	0x04000000 /* Strip Ethernet CRC */
-
-/* Use byte values for the following shift parameters
- * Usage:
- *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
- *		  E1000_PSRCTL_BSIZE0_MASK) |
- *		((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
- *		  E1000_PSRCTL_BSIZE1_MASK) |
- *		((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
- *		  E1000_PSRCTL_BSIZE2_MASK) |
- *		((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
- *		  E1000_PSRCTL_BSIZE3_MASK))
- * where value0 = [128..16256],  default=256
- *       value1 = [1024..64512], default=4096
- *       value2 = [0..64512],    default=4096
- *       value3 = [0..64512],    default=0
- */
-
-#define E1000_PSRCTL_BSIZE0_MASK	0x0000007F
-#define E1000_PSRCTL_BSIZE1_MASK	0x00003F00
-#define E1000_PSRCTL_BSIZE2_MASK	0x003F0000
-#define E1000_PSRCTL_BSIZE3_MASK	0x3F000000
-
-#define E1000_PSRCTL_BSIZE0_SHIFT	7    /* Shift _right_ 7 */
-#define E1000_PSRCTL_BSIZE1_SHIFT	2    /* Shift _right_ 2 */
-#define E1000_PSRCTL_BSIZE2_SHIFT	6    /* Shift _left_ 6 */
-#define E1000_PSRCTL_BSIZE3_SHIFT	14   /* Shift _left_ 14 */
-
-/* SWFW_SYNC Definitions */
-#define E1000_SWFW_EEP_SM	0x01
-#define E1000_SWFW_PHY0_SM	0x02
-#define E1000_SWFW_PHY1_SM	0x04
-#define E1000_SWFW_CSR_SM	0x08
-#define E1000_SWFW_PHY2_SM	0x20
-#define E1000_SWFW_PHY3_SM	0x40
-#define E1000_SWFW_SW_MNG_SM	0x400
-
-/* Device Control */
-#define E1000_CTRL_FD		0x00000001  /* Full duplex.0=half; 1=full */
-#define E1000_CTRL_PRIOR	0x00000004  /* Priority on PCI. 0=rx,1=fair */
-#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */
-#define E1000_CTRL_LRST		0x00000008  /* Link reset. 0=normal,1=reset */
-#define E1000_CTRL_ASDE		0x00000020  /* Auto-speed detect enable */
-#define E1000_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
-#define E1000_CTRL_ILOS		0x00000080  /* Invert Loss-Of Signal */
-#define E1000_CTRL_SPD_SEL	0x00000300  /* Speed Select Mask */
-#define E1000_CTRL_SPD_10	0x00000000  /* Force 10Mb */
-#define E1000_CTRL_SPD_100	0x00000100  /* Force 100Mb */
-#define E1000_CTRL_SPD_1000	0x00000200  /* Force 1Gb */
-#define E1000_CTRL_FRCSPD	0x00000800  /* Force Speed */
-#define E1000_CTRL_FRCDPX	0x00001000  /* Force Duplex */
-#define E1000_CTRL_SWDPIN0	0x00040000 /* SWDPIN 0 value */
-#define E1000_CTRL_SWDPIN1	0x00080000 /* SWDPIN 1 value */
-#define E1000_CTRL_SWDPIN2	0x00100000 /* SWDPIN 2 value */
-#define E1000_CTRL_ADVD3WUC	0x00100000 /* D3 WUC */
-#define E1000_CTRL_SWDPIN3	0x00200000 /* SWDPIN 3 value */
-#define E1000_CTRL_SWDPIO0	0x00400000 /* SWDPIN 0 Input or output */
-#define E1000_CTRL_RST		0x04000000 /* Global reset */
-#define E1000_CTRL_RFCE		0x08000000 /* Receive Flow Control enable */
-#define E1000_CTRL_TFCE		0x10000000 /* Transmit flow control enable */
-#define E1000_CTRL_VME		0x40000000 /* IEEE VLAN mode enable */
-#define E1000_CTRL_PHY_RST	0x80000000 /* PHY Reset */
-#define E1000_CTRL_I2C_ENA	0x02000000 /* I2C enable */
-
-
-#define E1000_CONNSW_ENRGSRC		0x4
-#define E1000_CONNSW_PHYSD		0x400
-#define E1000_CONNSW_PHY_PDN		0x800
-#define E1000_CONNSW_SERDESD		0x200
-#define E1000_CONNSW_AUTOSENSE_CONF	0x2
-#define E1000_CONNSW_AUTOSENSE_EN	0x1
-#define E1000_PCS_CFG_PCS_EN		8
-#define E1000_PCS_LCTL_FLV_LINK_UP	1
-#define E1000_PCS_LCTL_FSV_10		0
-#define E1000_PCS_LCTL_FSV_100		2
-#define E1000_PCS_LCTL_FSV_1000		4
-#define E1000_PCS_LCTL_FDV_FULL		8
-#define E1000_PCS_LCTL_FSD		0x10
-#define E1000_PCS_LCTL_FORCE_LINK	0x20
-#define E1000_PCS_LCTL_FORCE_FCTRL	0x80
-#define E1000_PCS_LCTL_AN_ENABLE	0x10000
-#define E1000_PCS_LCTL_AN_RESTART	0x20000
-#define E1000_PCS_LCTL_AN_TIMEOUT	0x40000
-#define E1000_ENABLE_SERDES_LOOPBACK	0x0410
-
-#define E1000_PCS_LSTS_LINK_OK		1
-#define E1000_PCS_LSTS_SPEED_100	2
-#define E1000_PCS_LSTS_SPEED_1000	4
-#define E1000_PCS_LSTS_DUPLEX_FULL	8
-#define E1000_PCS_LSTS_SYNK_OK		0x10
-#define E1000_PCS_LSTS_AN_COMPLETE	0x10000
-
-/* Device Status */
-#define E1000_STATUS_FD			0x00000001 /* Duplex 0=half 1=full */
-#define E1000_STATUS_LU			0x00000002 /* Link up.0=no,1=link */
-#define E1000_STATUS_FUNC_MASK		0x0000000C /* PCI Function Mask */
-#define E1000_STATUS_FUNC_SHIFT		2
-#define E1000_STATUS_FUNC_1		0x00000004 /* Function 1 */
-#define E1000_STATUS_TXOFF		0x00000010 /* transmission paused */
-#define E1000_STATUS_SPEED_MASK	0x000000C0
-#define E1000_STATUS_SPEED_10		0x00000000 /* Speed 10Mb/s */
-#define E1000_STATUS_SPEED_100		0x00000040 /* Speed 100Mb/s */
-#define E1000_STATUS_SPEED_1000		0x00000080 /* Speed 1000Mb/s */
-#define E1000_STATUS_LAN_INIT_DONE	0x00000200 /* Lan Init Compltn by NVM */
-#define E1000_STATUS_PHYRA		0x00000400 /* PHY Reset Asserted */
-#define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000 /* Master request status */
-#define E1000_STATUS_2P5_SKU		0x00001000 /* Val of 2.5GBE SKU strap */
-#define E1000_STATUS_2P5_SKU_OVER	0x00002000 /* Val of 2.5GBE SKU Over */
-
-#define SPEED_10	10
-#define SPEED_100	100
-#define SPEED_1000	1000
-#define SPEED_2500	2500
-#define HALF_DUPLEX	1
-#define FULL_DUPLEX	2
-
-
-#define ADVERTISE_10_HALF		0x0001
-#define ADVERTISE_10_FULL		0x0002
-#define ADVERTISE_100_HALF		0x0004
-#define ADVERTISE_100_FULL		0x0008
-#define ADVERTISE_1000_HALF		0x0010 /* Not used, just FYI */
-#define ADVERTISE_1000_FULL		0x0020
-
-/* 1000/H is not supported, nor spec-compliant. */
-#define E1000_ALL_SPEED_DUPLEX	( \
-	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
-	ADVERTISE_100_FULL | ADVERTISE_1000_FULL)
-#define E1000_ALL_NOT_GIG	( \
-	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
-	ADVERTISE_100_FULL)
-#define E1000_ALL_100_SPEED	(ADVERTISE_100_HALF | ADVERTISE_100_FULL)
-#define E1000_ALL_10_SPEED	(ADVERTISE_10_HALF | ADVERTISE_10_FULL)
-#define E1000_ALL_HALF_DUPLEX	(ADVERTISE_10_HALF | ADVERTISE_100_HALF)
-
-#define AUTONEG_ADVERTISE_SPEED_DEFAULT		E1000_ALL_SPEED_DUPLEX
-
-/* LED Control */
-#define E1000_LEDCTL_LED0_MODE_MASK	0x0000000F
-#define E1000_LEDCTL_LED0_MODE_SHIFT	0
-#define E1000_LEDCTL_LED0_IVRT		0x00000040
-#define E1000_LEDCTL_LED0_BLINK		0x00000080
-
-#define E1000_LEDCTL_MODE_LED_ON	0xE
-#define E1000_LEDCTL_MODE_LED_OFF	0xF
-
-/* Transmit Descriptor bit definitions */
-#define E1000_TXD_DTYP_D	0x00100000 /* Data Descriptor */
-#define E1000_TXD_DTYP_C	0x00000000 /* Context Descriptor */
-#define E1000_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
-#define E1000_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
-#define E1000_TXD_CMD_EOP	0x01000000 /* End of Packet */
-#define E1000_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
-#define E1000_TXD_CMD_IC	0x04000000 /* Insert Checksum */
-#define E1000_TXD_CMD_RS	0x08000000 /* Report Status */
-#define E1000_TXD_CMD_RPS	0x10000000 /* Report Packet Sent */
-#define E1000_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
-#define E1000_TXD_CMD_VLE	0x40000000 /* Add VLAN tag */
-#define E1000_TXD_CMD_IDE	0x80000000 /* Enable Tidv register */
-#define E1000_TXD_STAT_DD	0x00000001 /* Descriptor Done */
-#define E1000_TXD_STAT_EC	0x00000002 /* Excess Collisions */
-#define E1000_TXD_STAT_LC	0x00000004 /* Late Collisions */
-#define E1000_TXD_STAT_TU	0x00000008 /* Transmit underrun */
-#define E1000_TXD_CMD_TCP	0x01000000 /* TCP packet */
-#define E1000_TXD_CMD_IP	0x02000000 /* IP packet */
-#define E1000_TXD_CMD_TSE	0x04000000 /* TCP Seg enable */
-#define E1000_TXD_STAT_TC	0x00000004 /* Tx Underrun */
-#define E1000_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
-
-/* Transmit Control */
-#define E1000_TCTL_EN		0x00000002 /* enable Tx */
-#define E1000_TCTL_PSP		0x00000008 /* pad short packets */
-#define E1000_TCTL_CT		0x00000ff0 /* collision threshold */
-#define E1000_TCTL_COLD		0x003ff000 /* collision distance */
-#define E1000_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
-#define E1000_TCTL_MULR		0x10000000 /* Multiple request support */
-
-/* Transmit Arbitration Count */
-#define E1000_TARC0_ENABLE	0x00000400 /* Enable Tx Queue 0 */
-
-/* SerDes Control */
-#define E1000_SCTL_DISABLE_SERDES_LOOPBACK	0x0400
-#define E1000_SCTL_ENABLE_SERDES_LOOPBACK	0x0410
-
-/* Receive Checksum Control */
-#define E1000_RXCSUM_IPOFL	0x00000100 /* IPv4 checksum offload */
-#define E1000_RXCSUM_TUOFL	0x00000200 /* TCP / UDP checksum offload */
-#define E1000_RXCSUM_CRCOFL	0x00000800 /* CRC32 offload enable */
-#define E1000_RXCSUM_IPPCSE	0x00001000 /* IP payload checksum enable */
-#define E1000_RXCSUM_PCSD	0x00002000 /* packet checksum disabled */
-
-/* Header split receive */
-#define E1000_RFCTL_NFSW_DIS		0x00000040
-#define E1000_RFCTL_NFSR_DIS		0x00000080
-#define E1000_RFCTL_ACK_DIS		0x00001000
-#define E1000_RFCTL_EXTEN		0x00008000
-#define E1000_RFCTL_IPV6_EX_DIS		0x00010000
-#define E1000_RFCTL_NEW_IPV6_EXT_DIS	0x00020000
-#define E1000_RFCTL_LEF			0x00040000
-
-/* Collision related configuration parameters */
-#define E1000_COLLISION_THRESHOLD	15
-#define E1000_CT_SHIFT			4
-#define E1000_COLLISION_DISTANCE	63
-#define E1000_COLD_SHIFT		12
-
-/* Default values for the transmit IPG register */
-#define DEFAULT_82543_TIPG_IPGT_FIBER	9
-#define DEFAULT_82543_TIPG_IPGT_COPPER	8
-
-#define E1000_TIPG_IPGT_MASK		0x000003FF
-
-#define DEFAULT_82543_TIPG_IPGR1	8
-#define E1000_TIPG_IPGR1_SHIFT		10
-
-#define DEFAULT_82543_TIPG_IPGR2	6
-#define DEFAULT_80003ES2LAN_TIPG_IPGR2	7
-#define E1000_TIPG_IPGR2_SHIFT		20
-
-/* Ethertype field values */
-#define ETHERNET_IEEE_VLAN_TYPE		0x8100  /* 802.3ac packet */
-
-#define ETHERNET_FCS_SIZE		4
-#define MAX_JUMBO_FRAME_SIZE		0x3F00
-
-/* Extended Configuration Control and Size */
-#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP	0x00000020
-#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE	0x00000001
-#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE	0x00000008
-#define E1000_EXTCNF_CTRL_SWFLAG		0x00000020
-#define E1000_EXTCNF_CTRL_GATE_PHY_CFG		0x00000080
-#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK	0x00FF0000
-#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT	16
-#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK	0x0FFF0000
-#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT	16
-
-#define E1000_PHY_CTRL_D0A_LPLU			0x00000002
-#define E1000_PHY_CTRL_NOND0A_LPLU		0x00000004
-#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE	0x00000008
-#define E1000_PHY_CTRL_GBE_DISABLE		0x00000040
-
-#define E1000_KABGTXD_BGSQLBIAS			0x00050000
-
-/* PBA constants */
-#define E1000_PBA_8K		0x0008    /* 8KB */
-#define E1000_PBA_10K		0x000A    /* 10KB */
-#define E1000_PBA_12K		0x000C    /* 12KB */
-#define E1000_PBA_14K		0x000E    /* 14KB */
-#define E1000_PBA_16K		0x0010    /* 16KB */
-#define E1000_PBA_18K		0x0012
-#define E1000_PBA_20K		0x0014
-#define E1000_PBA_22K		0x0016
-#define E1000_PBA_24K		0x0018
-#define E1000_PBA_26K		0x001A
-#define E1000_PBA_30K		0x001E
-#define E1000_PBA_32K		0x0020
-#define E1000_PBA_34K		0x0022
-#define E1000_PBA_35K		0x0023
-#define E1000_PBA_38K		0x0026
-#define E1000_PBA_40K		0x0028
-#define E1000_PBA_48K		0x0030    /* 48KB */
-#define E1000_PBA_64K		0x0040    /* 64KB */
-
-#define E1000_PBA_RXA_MASK	0xFFFF
-
-#define E1000_PBS_16K		E1000_PBA_16K
-
-#define IFS_MAX			80
-#define IFS_MIN			40
-#define IFS_RATIO		4
-#define IFS_STEP		10
-#define MIN_NUM_XMITS		1000
-
-/* SW Semaphore Register */
-#define E1000_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
-#define E1000_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
-#define E1000_SWSM_DRV_LOAD	0x00000008 /* Driver Loaded Bit */
-
-#define E1000_SWSM2_LOCK	0x00000002 /* Secondary driver semaphore bit */
-
-/* Interrupt Cause Read */
-#define E1000_ICR_TXDW		0x00000001 /* Transmit desc written back */
-#define E1000_ICR_TXQE		0x00000002 /* Transmit Queue empty */
-#define E1000_ICR_LSC		0x00000004 /* Link Status Change */
-#define E1000_ICR_RXSEQ		0x00000008 /* Rx sequence error */
-#define E1000_ICR_RXDMT0	0x00000010 /* Rx desc min. threshold (0) */
-#define E1000_ICR_RXO		0x00000040 /* Rx overrun */
-#define E1000_ICR_RXT0		0x00000080 /* Rx timer intr (ring 0) */
-#define E1000_ICR_VMMB		0x00000100 /* VM MB event */
-#define E1000_ICR_RXCFG		0x00000400 /* Rx /c/ ordered set */
-#define E1000_ICR_GPI_EN0	0x00000800 /* GP Int 0 */
-#define E1000_ICR_GPI_EN1	0x00001000 /* GP Int 1 */
-#define E1000_ICR_GPI_EN2	0x00002000 /* GP Int 2 */
-#define E1000_ICR_GPI_EN3	0x00004000 /* GP Int 3 */
-#define E1000_ICR_TXD_LOW	0x00008000
-#define E1000_ICR_MNG		0x00040000 /* Manageability event */
-#define E1000_ICR_TS		0x00080000 /* Time Sync Interrupt */
-#define E1000_ICR_DRSTA		0x40000000 /* Device Reset Asserted */
-/* If this bit asserted, the driver should claim the interrupt */
-#define E1000_ICR_INT_ASSERTED	0x80000000
-#define E1000_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
-#define E1000_ICR_FER		0x00400000 /* Fatal Error */
-
-#define E1000_ICR_THS		0x00800000 /* ICR.THS: Thermal Sensor Event*/
-#define E1000_ICR_MDDET		0x10000000 /* Malicious Driver Detect */
-
-
-/* Extended Interrupt Cause Read */
-#define E1000_EICR_RX_QUEUE0	0x00000001 /* Rx Queue 0 Interrupt */
-#define E1000_EICR_RX_QUEUE1	0x00000002 /* Rx Queue 1 Interrupt */
-#define E1000_EICR_RX_QUEUE2	0x00000004 /* Rx Queue 2 Interrupt */
-#define E1000_EICR_RX_QUEUE3	0x00000008 /* Rx Queue 3 Interrupt */
-#define E1000_EICR_TX_QUEUE0	0x00000100 /* Tx Queue 0 Interrupt */
-#define E1000_EICR_TX_QUEUE1	0x00000200 /* Tx Queue 1 Interrupt */
-#define E1000_EICR_TX_QUEUE2	0x00000400 /* Tx Queue 2 Interrupt */
-#define E1000_EICR_TX_QUEUE3	0x00000800 /* Tx Queue 3 Interrupt */
-#define E1000_EICR_TCP_TIMER	0x40000000 /* TCP Timer */
-#define E1000_EICR_OTHER	0x80000000 /* Interrupt Cause Active */
-/* TCP Timer */
-#define E1000_TCPTIMER_KS	0x00000100 /* KickStart */
-#define E1000_TCPTIMER_COUNT_ENABLE	0x00000200 /* Count Enable */
-#define E1000_TCPTIMER_COUNT_FINISH	0x00000400 /* Count finish */
-#define E1000_TCPTIMER_LOOP	0x00000800 /* Loop */
-
-/* This defines the bits that are set in the Interrupt Mask
- * Set/Read Register.  Each bit is documented below:
- *   o RXT0   = Receiver Timer Interrupt (ring 0)
- *   o TXDW   = Transmit Descriptor Written Back
- *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
- *   o RXSEQ  = Receive Sequence Error
- *   o LSC    = Link Status Change
- */
-#define IMS_ENABLE_MASK ( \
-	E1000_IMS_RXT0   |    \
-	E1000_IMS_TXDW   |    \
-	E1000_IMS_RXDMT0 |    \
-	E1000_IMS_RXSEQ  |    \
-	E1000_IMS_LSC)
-
-/* Interrupt Mask Set */
-#define E1000_IMS_TXDW		E1000_ICR_TXDW    /* Tx desc written back */
-#define E1000_IMS_TXQE		E1000_ICR_TXQE    /* Transmit Queue empty */
-#define E1000_IMS_LSC		E1000_ICR_LSC     /* Link Status Change */
-#define E1000_IMS_VMMB		E1000_ICR_VMMB    /* Mail box activity */
-#define E1000_IMS_RXSEQ		E1000_ICR_RXSEQ   /* Rx sequence error */
-#define E1000_IMS_RXDMT0	E1000_ICR_RXDMT0  /* Rx desc min. threshold */
-#define E1000_IMS_RXO		E1000_ICR_RXO     /* Rx overrun */
-#define E1000_IMS_RXT0		E1000_ICR_RXT0    /* Rx timer intr */
-#define E1000_IMS_TXD_LOW	E1000_ICR_TXD_LOW
-#define E1000_IMS_TS		E1000_ICR_TS      /* Time Sync Interrupt */
-#define E1000_IMS_DRSTA		E1000_ICR_DRSTA   /* Device Reset Asserted */
-#define E1000_IMS_DOUTSYNC	E1000_ICR_DOUTSYNC /* NIC DMA out of sync */
-#define E1000_IMS_FER		E1000_ICR_FER /* Fatal Error */
-
-#define E1000_IMS_THS		E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/
-#define E1000_IMS_MDDET		E1000_ICR_MDDET /* Malicious Driver Detect */
-/* Extended Interrupt Mask Set */
-#define E1000_EIMS_RX_QUEUE0	E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
-#define E1000_EIMS_RX_QUEUE1	E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
-#define E1000_EIMS_RX_QUEUE2	E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
-#define E1000_EIMS_RX_QUEUE3	E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
-#define E1000_EIMS_TX_QUEUE0	E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
-#define E1000_EIMS_TX_QUEUE1	E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
-#define E1000_EIMS_TX_QUEUE2	E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
-#define E1000_EIMS_TX_QUEUE3	E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
-#define E1000_EIMS_TCP_TIMER	E1000_EICR_TCP_TIMER /* TCP Timer */
-#define E1000_EIMS_OTHER	E1000_EICR_OTHER   /* Interrupt Cause Active */
-
-/* Interrupt Cause Set */
-#define E1000_ICS_LSC		E1000_ICR_LSC       /* Link Status Change */
-#define E1000_ICS_RXSEQ		E1000_ICR_RXSEQ     /* Rx sequence error */
-#define E1000_ICS_RXDMT0	E1000_ICR_RXDMT0    /* Rx desc min. threshold */
-
-/* Extended Interrupt Cause Set */
-#define E1000_EICS_RX_QUEUE0	E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
-#define E1000_EICS_RX_QUEUE1	E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
-#define E1000_EICS_RX_QUEUE2	E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
-#define E1000_EICS_RX_QUEUE3	E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
-#define E1000_EICS_TX_QUEUE0	E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
-#define E1000_EICS_TX_QUEUE1	E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
-#define E1000_EICS_TX_QUEUE2	E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
-#define E1000_EICS_TX_QUEUE3	E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
-#define E1000_EICS_TCP_TIMER	E1000_EICR_TCP_TIMER /* TCP Timer */
-#define E1000_EICS_OTHER	E1000_EICR_OTHER   /* Interrupt Cause Active */
-
-#define E1000_EITR_ITR_INT_MASK	0x0000FFFF
-/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
-#define E1000_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
-#define E1000_EITR_INTERVAL 0x00007FFC
-
-/* Transmit Descriptor Control */
-#define E1000_TXDCTL_PTHRESH	0x0000003F /* TXDCTL Prefetch Threshold */
-#define E1000_TXDCTL_HTHRESH	0x00003F00 /* TXDCTL Host Threshold */
-#define E1000_TXDCTL_WTHRESH	0x003F0000 /* TXDCTL Writeback Threshold */
-#define E1000_TXDCTL_GRAN	0x01000000 /* TXDCTL Granularity */
-#define E1000_TXDCTL_FULL_TX_DESC_WB	0x01010000 /* GRAN=1, WTHRESH=1 */
-#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
-/* Enable the counting of descriptors still to be processed. */
-#define E1000_TXDCTL_COUNT_DESC	0x00400000
-
-/* Flow Control Constants */
-#define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
-#define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
-#define FLOW_CONTROL_TYPE		0x8808
-
-/* 802.1q VLAN Packet Size */
-#define VLAN_TAG_SIZE			4    /* 802.3ac tag (not DMA'd) */
-#define E1000_VLAN_FILTER_TBL_SIZE	128  /* VLAN Filter Table (4096 bits) */
-
-/* Receive Address
- * Number of high/low register pairs in the RAR. The RAR (Receive Address
- * Registers) holds the directed and multicast addresses that we monitor.
- * Technically, we have 16 spots.  However, we reserve one of these spots
- * (RAR[15]) for our directed address used by controllers with
- * manageability enabled, allowing us room for 15 multicast addresses.
- */
-#define E1000_RAR_ENTRIES	15
-#define E1000_RAH_AV		0x80000000 /* Receive descriptor valid */
-#define E1000_RAL_MAC_ADDR_LEN	4
-#define E1000_RAH_MAC_ADDR_LEN	2
-#define E1000_RAH_QUEUE_MASK_82575	0x000C0000
-#define E1000_RAH_POOL_1	0x00040000
-
-/* Error Codes */
-#define E1000_SUCCESS			0
-#define E1000_ERR_NVM			1
-#define E1000_ERR_PHY			2
-#define E1000_ERR_CONFIG		3
-#define E1000_ERR_PARAM			4
-#define E1000_ERR_MAC_INIT		5
-#define E1000_ERR_PHY_TYPE		6
-#define E1000_ERR_RESET			9
-#define E1000_ERR_MASTER_REQUESTS_PENDING	10
-#define E1000_ERR_HOST_INTERFACE_COMMAND	11
-#define E1000_BLK_PHY_RESET		12
-#define E1000_ERR_SWFW_SYNC		13
-#define E1000_NOT_IMPLEMENTED		14
-#define E1000_ERR_MBX			15
-#define E1000_ERR_INVALID_ARGUMENT	16
-#define E1000_ERR_NO_SPACE		17
-#define E1000_ERR_NVM_PBA_SECTION	18
-#define E1000_ERR_I2C			19
-#define E1000_ERR_INVM_VALUE_NOT_FOUND	20
-
-/* Loop limit on how long we wait for auto-negotiation to complete */
-#define FIBER_LINK_UP_LIMIT		50
-#define COPPER_LINK_UP_LIMIT		10
-#define PHY_AUTO_NEG_LIMIT		45
-#define PHY_FORCE_LIMIT			20
-/* Number of 100 microseconds we wait for PCI Express master disable */
-#define MASTER_DISABLE_TIMEOUT		800
-/* Number of milliseconds we wait for PHY configuration done after MAC reset */
-#define PHY_CFG_TIMEOUT			100
-/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
-#define MDIO_OWNERSHIP_TIMEOUT		10
-/* Number of milliseconds for NVM auto read done after MAC reset. */
-#define AUTO_READ_DONE_TIMEOUT		10
-
-/* Flow Control */
-#define E1000_FCRTH_RTH		0x0000FFF8 /* Mask Bits[15:3] for RTH */
-#define E1000_FCRTL_RTL		0x0000FFF8 /* Mask Bits[15:3] for RTL */
-#define E1000_FCRTL_XONE	0x80000000 /* Enable XON frame transmission */
-
-/* Transmit Configuration Word */
-#define E1000_TXCW_FD		0x00000020 /* TXCW full duplex */
-#define E1000_TXCW_PAUSE	0x00000080 /* TXCW sym pause request */
-#define E1000_TXCW_ASM_DIR	0x00000100 /* TXCW astm pause direction */
-#define E1000_TXCW_PAUSE_MASK	0x00000180 /* TXCW pause request mask */
-#define E1000_TXCW_ANE		0x80000000 /* Auto-neg enable */
-
-/* Receive Configuration Word */
-#define E1000_RXCW_CW		0x0000ffff /* RxConfigWord mask */
-#define E1000_RXCW_IV		0x08000000 /* Receive config invalid */
-#define E1000_RXCW_C		0x20000000 /* Receive config */
-#define E1000_RXCW_SYNCH	0x40000000 /* Receive config synch */
-
-#define E1000_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
-#define E1000_TSYNCTXCTL_ENABLED	0x00000010 /* enable Tx timestamping */
-
-#define E1000_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
-#define E1000_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
-#define E1000_TSYNCRXCTL_TYPE_L2_V2	0x00
-#define E1000_TSYNCRXCTL_TYPE_L4_V1	0x02
-#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
-#define E1000_TSYNCRXCTL_TYPE_ALL	0x08
-#define E1000_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
-#define E1000_TSYNCRXCTL_ENABLED	0x00000010 /* enable Rx timestamping */
-#define E1000_TSYNCRXCTL_SYSCFI		0x00000020 /* Sys clock frequency */
-
-#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK		0x000000FF
-#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE		0x00
-#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE	0x01
-#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE	0x02
-#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE	0x03
-#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE	0x04
-
-#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK		0x00000F00
-#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE		0x0000
-#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE	0x0100
-#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE	0x0200
-#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE	0x0300
-#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE	0x0800
-#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE	0x0900
-#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00
-#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE	0x0B00
-#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE	0x0C00
-#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE	0x0D00
-
-#define E1000_TIMINCA_16NS_SHIFT	24
-#define E1000_TIMINCA_INCPERIOD_SHIFT	24
-#define E1000_TIMINCA_INCVALUE_MASK	0x00FFFFFF
-
-#define E1000_TSICR_TXTS		0x00000002
-#define E1000_TSIM_TXTS			0x00000002
-/* TUPLE Filtering Configuration */
-#define E1000_TTQF_DISABLE_MASK		0xF0008000 /* TTQF Disable Mask */
-#define E1000_TTQF_QUEUE_ENABLE		0x100   /* TTQF Queue Enable Bit */
-#define E1000_TTQF_PROTOCOL_MASK	0xFF    /* TTQF Protocol Mask */
-/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */
-#define E1000_TTQF_PROTOCOL_TCP		0x0
-/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
-#define E1000_TTQF_PROTOCOL_UDP		0x1
-/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
-#define E1000_TTQF_PROTOCOL_SCTP	0x2
-#define E1000_TTQF_PROTOCOL_SHIFT	5       /* TTQF Protocol Shift */
-#define E1000_TTQF_QUEUE_SHIFT		16      /* TTQF Queue Shfit */
-#define E1000_TTQF_RX_QUEUE_MASK	0x70000 /* TTQF Queue Mask */
-#define E1000_TTQF_MASK_ENABLE		0x10000000 /* TTQF Mask Enable Bit */
-#define E1000_IMIR_CLEAR_MASK		0xF001FFFF /* IMIR Reg Clear Mask */
-#define E1000_IMIR_PORT_BYPASS		0x20000 /* IMIR Port Bypass Bit */
-#define E1000_IMIR_PRIORITY_SHIFT	29 /* IMIR Priority Shift */
-#define E1000_IMIREXT_CLEAR_MASK	0x7FFFF /* IMIREXT Reg Clear Mask */
-
-#define E1000_MDICNFG_EXT_MDIO		0x80000000 /* MDI ext/int destination */
-#define E1000_MDICNFG_COM_MDIO		0x40000000 /* MDI shared w/ lan 0 */
-#define E1000_MDICNFG_PHY_MASK		0x03E00000
-#define E1000_MDICNFG_PHY_SHIFT		21
-
-#define E1000_MEDIA_PORT_COPPER			1
-#define E1000_MEDIA_PORT_OTHER			2
-#define E1000_M88E1112_AUTO_COPPER_SGMII	0x2
-#define E1000_M88E1112_AUTO_COPPER_BASEX	0x3
-#define E1000_M88E1112_STATUS_LINK		0x0004 /* Interface Link Bit */
-#define E1000_M88E1112_MAC_CTRL_1		0x10
-#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK	0x0380 /* Mode Select */
-#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT	7
-#define E1000_M88E1112_PAGE_ADDR		0x16
-#define E1000_M88E1112_STATUS			0x01
-
-#define E1000_THSTAT_LOW_EVENT		0x20000000 /* Low thermal threshold */
-#define E1000_THSTAT_MID_EVENT		0x00200000 /* Mid thermal threshold */
-#define E1000_THSTAT_HIGH_EVENT		0x00002000 /* High thermal threshold */
-#define E1000_THSTAT_PWR_DOWN		0x00000001 /* Power Down Event */
-#define E1000_THSTAT_LINK_THROTTLE	0x00000002 /* Link Spd Throttle Event */
-
-/* I350 EEE defines */
-#define E1000_IPCNFG_EEE_1G_AN		0x00000008 /* IPCNFG EEE Ena 1G AN */
-#define E1000_IPCNFG_EEE_100M_AN	0x00000004 /* IPCNFG EEE Ena 100M AN */
-#define E1000_EEER_TX_LPI_EN		0x00010000 /* EEER Tx LPI Enable */
-#define E1000_EEER_RX_LPI_EN		0x00020000 /* EEER Rx LPI Enable */
-#define E1000_EEER_LPI_FC		0x00040000 /* EEER Ena on Flow Cntrl */
-/* EEE status */
-#define E1000_EEER_EEE_NEG		0x20000000 /* EEE capability nego */
-#define E1000_EEER_RX_LPI_STATUS	0x40000000 /* Rx in LPI state */
-#define E1000_EEER_TX_LPI_STATUS	0x80000000 /* Tx in LPI state */
-#define E1000_EEE_LP_ADV_ADDR_I350	0x040F     /* EEE LP Advertisement */
-#define E1000_M88E1543_PAGE_ADDR	0x16       /* Page Offset Register */
-#define E1000_M88E1543_EEE_CTRL_1	0x0
-#define E1000_M88E1543_EEE_CTRL_1_MS	0x0001     /* EEE Master/Slave */
-#define E1000_EEE_ADV_DEV_I354		7
-#define E1000_EEE_ADV_ADDR_I354		60
-#define E1000_EEE_ADV_100_SUPPORTED	(1 << 1)   /* 100BaseTx EEE Supported */
-#define E1000_EEE_ADV_1000_SUPPORTED	(1 << 2)   /* 1000BaseT EEE Supported */
-#define E1000_PCS_STATUS_DEV_I354	3
-#define E1000_PCS_STATUS_ADDR_I354	1
-#define E1000_PCS_STATUS_RX_LPI_RCVD	0x0400
-#define E1000_PCS_STATUS_TX_LPI_RCVD	0x0800
-#define E1000_EEE_SU_LPI_CLK_STP	0x00800000 /* EEE LPI Clock Stop */
-#define E1000_EEE_LP_ADV_DEV_I210	7          /* EEE LP Adv Device */
-#define E1000_EEE_LP_ADV_ADDR_I210	61         /* EEE LP Adv Register */
-/* PCI Express Control */
-#define E1000_GCR_RXD_NO_SNOOP		0x00000001
-#define E1000_GCR_RXDSCW_NO_SNOOP	0x00000002
-#define E1000_GCR_RXDSCR_NO_SNOOP	0x00000004
-#define E1000_GCR_TXD_NO_SNOOP		0x00000008
-#define E1000_GCR_TXDSCW_NO_SNOOP	0x00000010
-#define E1000_GCR_TXDSCR_NO_SNOOP	0x00000020
-#define E1000_GCR_CMPL_TMOUT_MASK	0x0000F000
-#define E1000_GCR_CMPL_TMOUT_10ms	0x00001000
-#define E1000_GCR_CMPL_TMOUT_RESEND	0x00010000
-#define E1000_GCR_CAP_VER2		0x00040000
-
-#define PCIE_NO_SNOOP_ALL	(E1000_GCR_RXD_NO_SNOOP | \
-				 E1000_GCR_RXDSCW_NO_SNOOP | \
-				 E1000_GCR_RXDSCR_NO_SNOOP | \
-				 E1000_GCR_TXD_NO_SNOOP    | \
-				 E1000_GCR_TXDSCW_NO_SNOOP | \
-				 E1000_GCR_TXDSCR_NO_SNOOP)
-
-#define E1000_MMDAC_FUNC_DATA	0x4000 /* Data, no post increment */
-
-/* mPHY address control and data registers */
-#define E1000_MPHY_ADDR_CTL		0x0024 /* Address Control Reg */
-#define E1000_MPHY_ADDR_CTL_OFFSET_MASK	0xFFFF0000
-#define E1000_MPHY_DATA			0x0E10 /* Data Register */
-
-/* AFE CSR Offset for PCS CLK */
-#define E1000_MPHY_PCS_CLK_REG_OFFSET	0x0004
-/* Override for near end digital loopback. */
-#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN	0x10
-
-/* PHY Control Register */
-#define MII_CR_SPEED_SELECT_MSB	0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_COLL_TEST_ENABLE	0x0080  /* Collision test enable */
-#define MII_CR_FULL_DUPLEX	0x0100  /* FDX =1, half duplex =0 */
-#define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
-#define MII_CR_ISOLATE		0x0400  /* Isolate PHY from MII */
-#define MII_CR_POWER_DOWN	0x0800  /* Power down */
-#define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
-#define MII_CR_SPEED_SELECT_LSB	0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
-#define MII_CR_SPEED_1000	0x0040
-#define MII_CR_SPEED_100	0x2000
-#define MII_CR_SPEED_10		0x0000
-
-/* PHY Status Register */
-#define MII_SR_EXTENDED_CAPS	0x0001 /* Extended register capabilities */
-#define MII_SR_JABBER_DETECT	0x0002 /* Jabber Detected */
-#define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
-#define MII_SR_AUTONEG_CAPS	0x0008 /* Auto Neg Capable */
-#define MII_SR_REMOTE_FAULT	0x0010 /* Remote Fault Detect */
-#define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
-#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
-#define MII_SR_EXTENDED_STATUS	0x0100 /* Ext. status info in Reg 0x0F */
-#define MII_SR_100T2_HD_CAPS	0x0200 /* 100T2 Half Duplex Capable */
-#define MII_SR_100T2_FD_CAPS	0x0400 /* 100T2 Full Duplex Capable */
-#define MII_SR_10T_HD_CAPS	0x0800 /* 10T   Half Duplex Capable */
-#define MII_SR_10T_FD_CAPS	0x1000 /* 10T   Full Duplex Capable */
-#define MII_SR_100X_HD_CAPS	0x2000 /* 100X  Half Duplex Capable */
-#define MII_SR_100X_FD_CAPS	0x4000 /* 100X  Full Duplex Capable */
-#define MII_SR_100T4_CAPS	0x8000 /* 100T4 Capable */
-
-/* Autoneg Advertisement Register */
-#define NWAY_AR_SELECTOR_FIELD	0x0001   /* indicates IEEE 802.3 CSMA/CD */
-#define NWAY_AR_10T_HD_CAPS	0x0020   /* 10T   Half Duplex Capable */
-#define NWAY_AR_10T_FD_CAPS	0x0040   /* 10T   Full Duplex Capable */
-#define NWAY_AR_100TX_HD_CAPS	0x0080   /* 100TX Half Duplex Capable */
-#define NWAY_AR_100TX_FD_CAPS	0x0100   /* 100TX Full Duplex Capable */
-#define NWAY_AR_100T4_CAPS	0x0200   /* 100T4 Capable */
-#define NWAY_AR_PAUSE		0x0400   /* Pause operation desired */
-#define NWAY_AR_ASM_DIR		0x0800   /* Asymmetric Pause Direction bit */
-#define NWAY_AR_REMOTE_FAULT	0x2000   /* Remote Fault detected */
-#define NWAY_AR_NEXT_PAGE	0x8000   /* Next Page ability supported */
-
-/* Link Partner Ability Register (Base Page) */
-#define NWAY_LPAR_SELECTOR_FIELD	0x0000 /* LP protocol selector field */
-#define NWAY_LPAR_10T_HD_CAPS		0x0020 /* LP 10T Half Dplx Capable */
-#define NWAY_LPAR_10T_FD_CAPS		0x0040 /* LP 10T Full Dplx Capable */
-#define NWAY_LPAR_100TX_HD_CAPS		0x0080 /* LP 100TX Half Dplx Capable */
-#define NWAY_LPAR_100TX_FD_CAPS		0x0100 /* LP 100TX Full Dplx Capable */
-#define NWAY_LPAR_100T4_CAPS		0x0200 /* LP is 100T4 Capable */
-#define NWAY_LPAR_PAUSE			0x0400 /* LP Pause operation desired */
-#define NWAY_LPAR_ASM_DIR		0x0800 /* LP Asym Pause Direction bit */
-#define NWAY_LPAR_REMOTE_FAULT		0x2000 /* LP detected Remote Fault */
-#define NWAY_LPAR_ACKNOWLEDGE		0x4000 /* LP rx'd link code word */
-#define NWAY_LPAR_NEXT_PAGE		0x8000 /* Next Page ability supported */
-
-/* Autoneg Expansion Register */
-#define NWAY_ER_LP_NWAY_CAPS		0x0001 /* LP has Auto Neg Capability */
-#define NWAY_ER_PAGE_RXD		0x0002 /* LP 10T Half Dplx Capable */
-#define NWAY_ER_NEXT_PAGE_CAPS		0x0004 /* LP 10T Full Dplx Capable */
-#define NWAY_ER_LP_NEXT_PAGE_CAPS	0x0008 /* LP 100TX Half Dplx Capable */
-#define NWAY_ER_PAR_DETECT_FAULT	0x0010 /* LP 100TX Full Dplx Capable */
-
-/* 1000BASE-T Control Register */
-#define CR_1000T_ASYM_PAUSE	0x0080 /* Advertise asymmetric pause bit */
-#define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
-#define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
-/* 1=Repeater/switch device port 0=DTE device */
-#define CR_1000T_REPEATER_DTE	0x0400
-/* 1=Configure PHY as Master 0=Configure PHY as Slave */
-#define CR_1000T_MS_VALUE	0x0800
-/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */
-#define CR_1000T_MS_ENABLE	0x1000
-#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
-#define CR_1000T_TEST_MODE_1	0x2000 /* Transmit Waveform test */
-#define CR_1000T_TEST_MODE_2	0x4000 /* Master Transmit Jitter test */
-#define CR_1000T_TEST_MODE_3	0x6000 /* Slave Transmit Jitter test */
-#define CR_1000T_TEST_MODE_4	0x8000 /* Transmitter Distortion test */
-
-/* 1000BASE-T Status Register */
-#define SR_1000T_IDLE_ERROR_CNT		0x00FF /* Num idle err since last rd */
-#define SR_1000T_ASYM_PAUSE_DIR		0x0100 /* LP asym pause direction bit */
-#define SR_1000T_LP_HD_CAPS		0x0400 /* LP is 1000T HD capable */
-#define SR_1000T_LP_FD_CAPS		0x0800 /* LP is 1000T FD capable */
-#define SR_1000T_REMOTE_RX_STATUS	0x1000 /* Remote receiver OK */
-#define SR_1000T_LOCAL_RX_STATUS	0x2000 /* Local receiver OK */
-#define SR_1000T_MS_CONFIG_RES		0x4000 /* 1=Local Tx Master, 0=Slave */
-#define SR_1000T_MS_CONFIG_FAULT	0x8000 /* Master/Slave config fault */
-
-#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT	5
-
-/* PHY 1000 MII Register/Bit Definitions */
-/* PHY Registers defined by IEEE */
-#define PHY_CONTROL		0x00 /* Control Register */
-#define PHY_STATUS		0x01 /* Status Register */
-#define PHY_ID1			0x02 /* Phy Id Reg (word 1) */
-#define PHY_ID2			0x03 /* Phy Id Reg (word 2) */
-#define PHY_AUTONEG_ADV		0x04 /* Autoneg Advertisement */
-#define PHY_LP_ABILITY		0x05 /* Link Partner Ability (Base Page) */
-#define PHY_AUTONEG_EXP		0x06 /* Autoneg Expansion Reg */
-#define PHY_NEXT_PAGE_TX	0x07 /* Next Page Tx */
-#define PHY_LP_NEXT_PAGE	0x08 /* Link Partner Next Page */
-#define PHY_1000T_CTRL		0x09 /* 1000Base-T Control Reg */
-#define PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
-#define PHY_EXT_STATUS		0x0F /* Extended Status Reg */
-
-#define PHY_CONTROL_LB		0x4000 /* PHY Loopback bit */
-
-/* NVM Control */
-#define E1000_EECD_SK		0x00000001 /* NVM Clock */
-#define E1000_EECD_CS		0x00000002 /* NVM Chip Select */
-#define E1000_EECD_DI		0x00000004 /* NVM Data In */
-#define E1000_EECD_DO		0x00000008 /* NVM Data Out */
-#define E1000_EECD_REQ		0x00000040 /* NVM Access Request */
-#define E1000_EECD_GNT		0x00000080 /* NVM Access Grant */
-#define E1000_EECD_PRES		0x00000100 /* NVM Present */
-#define E1000_EECD_SIZE		0x00000200 /* NVM Size (0=64 word 1=256 word) */
-#define E1000_EECD_BLOCKED	0x00008000 /* Bit banging access blocked flag */
-#define E1000_EECD_ABORT	0x00010000 /* NVM operation aborted flag */
-#define E1000_EECD_TIMEOUT	0x00020000 /* NVM read operation timeout flag */
-#define E1000_EECD_ERROR_CLR	0x00040000 /* NVM error status clear bit */
-/* NVM Addressing bits based on type 0=small, 1=large */
-#define E1000_EECD_ADDR_BITS	0x00000400
-#define E1000_NVM_GRANT_ATTEMPTS	1000 /* NVM # attempts to gain grant */
-#define E1000_EECD_AUTO_RD		0x00000200  /* NVM Auto Read done */
-#define E1000_EECD_SIZE_EX_MASK		0x00007800  /* NVM Size */
-#define E1000_EECD_SIZE_EX_SHIFT	11
-#define E1000_EECD_FLUPD		0x00080000 /* Update FLASH */
-#define E1000_EECD_AUPDEN		0x00100000 /* Ena Auto FLASH update */
-#define E1000_EECD_SEC1VAL		0x00400000 /* Sector One Valid */
-#define E1000_EECD_SEC1VAL_VALID_MASK	(E1000_EECD_AUTO_RD | E1000_EECD_PRES)
-#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
-#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done */
-#define E1000_EECD_FLASH_DETECTED_I210	0x00080000 /* FLASH detected */
-#define E1000_EECD_SEC1VAL_I210		0x02000000 /* Sector One Valid */
-#define E1000_FLUDONE_ATTEMPTS		20000
-#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
-#define E1000_I210_FIFO_SEL_RX		0x00
-#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
-#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
-#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
-#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
-
-#define E1000_I210_FLASH_SECTOR_SIZE	0x1000 /* 4KB FLASH sector unit size */
-/* Secure FLASH mode requires removing MSb */
-#define E1000_I210_FW_PTR_MASK		0x7FFF
-/* Firmware code revision field word offset*/
-#define E1000_I210_FW_VER_OFFSET	328
-
-#define E1000_NVM_RW_REG_DATA	16  /* Offset to data in NVM read/write regs */
-#define E1000_NVM_RW_REG_DONE	2   /* Offset to READ/WRITE done bit */
-#define E1000_NVM_RW_REG_START	1   /* Start operation */
-#define E1000_NVM_RW_ADDR_SHIFT	2   /* Shift to the address bits */
-#define E1000_NVM_POLL_WRITE	1   /* Flag for polling for write complete */
-#define E1000_NVM_POLL_READ	0   /* Flag for polling for read complete */
-#define E1000_FLASH_UPDATES	2000
-
-/* NVM Word Offsets */
-#define NVM_COMPAT			0x0003
-#define NVM_ID_LED_SETTINGS		0x0004
-#define NVM_VERSION			0x0005
-#define E1000_I210_NVM_FW_MODULE_PTR	0x0010
-#define E1000_I350_NVM_FW_MODULE_PTR	0x0051
-#define NVM_FUTURE_INIT_WORD1		0x0019
-#define NVM_ETRACK_WORD			0x0042
-#define NVM_ETRACK_HIWORD		0x0043
-#define NVM_COMB_VER_OFF		0x0083
-#define NVM_COMB_VER_PTR		0x003d
-
-/* NVM version defines */
-#define NVM_MAJOR_MASK			0xF000
-#define NVM_MINOR_MASK			0x0FF0
-#define NVM_IMAGE_ID_MASK		0x000F
-#define NVM_COMB_VER_MASK		0x00FF
-#define NVM_MAJOR_SHIFT			12
-#define NVM_MINOR_SHIFT			4
-#define NVM_COMB_VER_SHFT		8
-#define NVM_VER_INVALID			0xFFFF
-#define NVM_ETRACK_SHIFT		16
-#define NVM_ETRACK_VALID		0x8000
-#define NVM_NEW_DEC_MASK		0x0F00
-#define NVM_HEX_CONV			16
-#define NVM_HEX_TENS			10
-
-/* FW version defines */
-/* Offset of "Loader patch ptr" in Firmware Header */
-#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET	0x01
-/* Patch generation hour & minutes */
-#define E1000_I350_NVM_FW_VER_WORD1_OFFSET		0x04
-/* Patch generation month & day */
-#define E1000_I350_NVM_FW_VER_WORD2_OFFSET		0x05
-/* Patch generation year */
-#define E1000_I350_NVM_FW_VER_WORD3_OFFSET		0x06
-/* Patch major & minor numbers */
-#define E1000_I350_NVM_FW_VER_WORD4_OFFSET		0x07
-
-#define NVM_MAC_ADDR			0x0000
-#define NVM_SUB_DEV_ID			0x000B
-#define NVM_SUB_VEN_ID			0x000C
-#define NVM_DEV_ID			0x000D
-#define NVM_VEN_ID			0x000E
-#define NVM_INIT_CTRL_2			0x000F
-#define NVM_INIT_CTRL_4			0x0013
-#define NVM_LED_1_CFG			0x001C
-#define NVM_LED_0_2_CFG			0x001F
-
-#define NVM_COMPAT_VALID_CSUM		0x0001
-#define NVM_FUTURE_INIT_WORD1_VALID_CSUM	0x0040
-
-#define NVM_ETS_CFG			0x003E
-#define NVM_ETS_LTHRES_DELTA_MASK	0x07C0
-#define NVM_ETS_LTHRES_DELTA_SHIFT	6
-#define NVM_ETS_TYPE_MASK		0x0038
-#define NVM_ETS_TYPE_SHIFT		3
-#define NVM_ETS_TYPE_EMC		0x000
-#define NVM_ETS_NUM_SENSORS_MASK	0x0007
-#define NVM_ETS_DATA_LOC_MASK		0x3C00
-#define NVM_ETS_DATA_LOC_SHIFT		10
-#define NVM_ETS_DATA_INDEX_MASK		0x0300
-#define NVM_ETS_DATA_INDEX_SHIFT	8
-#define NVM_ETS_DATA_HTHRESH_MASK	0x00FF
-#define NVM_INIT_CONTROL2_REG		0x000F
-#define NVM_INIT_CONTROL3_PORT_B	0x0014
-#define NVM_INIT_3GIO_3			0x001A
-#define NVM_SWDEF_PINS_CTRL_PORT_0	0x0020
-#define NVM_INIT_CONTROL3_PORT_A	0x0024
-#define NVM_CFG				0x0012
-#define NVM_ALT_MAC_ADDR_PTR		0x0037
-#define NVM_CHECKSUM_REG		0x003F
-#define NVM_COMPATIBILITY_REG_3		0x0003
-#define NVM_COMPATIBILITY_BIT_MASK	0x8000
-
-#define E1000_NVM_CFG_DONE_PORT_0	0x040000 /* MNG config cycle done */
-#define E1000_NVM_CFG_DONE_PORT_1	0x080000 /* ...for second port */
-#define E1000_NVM_CFG_DONE_PORT_2	0x100000 /* ...for third port */
-#define E1000_NVM_CFG_DONE_PORT_3	0x200000 /* ...for fourth port */
-
-#define NVM_82580_LAN_FUNC_OFFSET(a)	((a) ? (0x40 + (0x40 * (a))) : 0)
-
-/* Mask bits for fields in Word 0x24 of the NVM */
-#define NVM_WORD24_COM_MDIO		0x0008 /* MDIO interface shared */
-#define NVM_WORD24_EXT_MDIO		0x0004 /* MDIO accesses routed extrnl */
-/* Offset of Link Mode bits for 82575/82576 */
-#define NVM_WORD24_LNK_MODE_OFFSET	8
-/* Offset of Link Mode bits for 82580 up */
-#define NVM_WORD24_82580_LNK_MODE_OFFSET	4
-
-
-/* Mask bits for fields in Word 0x0f of the NVM */
-#define NVM_WORD0F_PAUSE_MASK		0x3000
-#define NVM_WORD0F_PAUSE		0x1000
-#define NVM_WORD0F_ASM_DIR		0x2000
-
-/* Mask bits for fields in Word 0x1a of the NVM */
-#define NVM_WORD1A_ASPM_MASK		0x000C
-
-/* Mask bits for fields in Word 0x03 of the EEPROM */
-#define NVM_COMPAT_LOM			0x0800
-
-/* length of string needed to store PBA number */
-#define E1000_PBANUM_LENGTH		11
-
-/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
-#define NVM_SUM				0xBABA
-
-/* PBA (printed board assembly) number words */
-#define NVM_PBA_OFFSET_0		8
-#define NVM_PBA_OFFSET_1		9
-#define NVM_PBA_PTR_GUARD		0xFAFA
-#define NVM_RESERVED_WORD		0xFFFF
-#define NVM_WORD_SIZE_BASE_SHIFT	6
-
-/* NVM Commands - SPI */
-#define NVM_MAX_RETRY_SPI	5000 /* Max wait of 5ms, for RDY signal */
-#define NVM_READ_OPCODE_SPI	0x03 /* NVM read opcode */
-#define NVM_WRITE_OPCODE_SPI	0x02 /* NVM write opcode */
-#define NVM_A8_OPCODE_SPI	0x08 /* opcode bit-3 = address bit-8 */
-#define NVM_WREN_OPCODE_SPI	0x06 /* NVM set Write Enable latch */
-#define NVM_RDSR_OPCODE_SPI	0x05 /* NVM read Status register */
-
-/* SPI NVM Status Register */
-#define NVM_STATUS_RDY_SPI	0x01
-
-/* Word definitions for ID LED Settings */
-#define ID_LED_RESERVED_0000	0x0000
-#define ID_LED_RESERVED_FFFF	0xFFFF
-#define ID_LED_DEFAULT		((ID_LED_OFF1_ON2  << 12) | \
-				 (ID_LED_OFF1_OFF2 <<  8) | \
-				 (ID_LED_DEF1_DEF2 <<  4) | \
-				 (ID_LED_DEF1_DEF2))
-#define ID_LED_DEF1_DEF2	0x1
-#define ID_LED_DEF1_ON2		0x2
-#define ID_LED_DEF1_OFF2	0x3
-#define ID_LED_ON1_DEF2		0x4
-#define ID_LED_ON1_ON2		0x5
-#define ID_LED_ON1_OFF2		0x6
-#define ID_LED_OFF1_DEF2	0x7
-#define ID_LED_OFF1_ON2		0x8
-#define ID_LED_OFF1_OFF2	0x9
-
-#define IGP_ACTIVITY_LED_MASK	0xFFFFF0FF
-#define IGP_ACTIVITY_LED_ENABLE	0x0300
-#define IGP_LED3_MODE		0x07000000
-
-/* PCI/PCI-X/PCI-EX Config space */
-#define PCI_HEADER_TYPE_REGISTER	0x0E
-#define PCIE_LINK_STATUS		0x12
-#define PCIE_DEVICE_CONTROL2		0x28
-
-#define PCI_HEADER_TYPE_MULTIFUNC	0x80
-#define PCIE_LINK_WIDTH_MASK		0x3F0
-#define PCIE_LINK_WIDTH_SHIFT		4
-#define PCIE_LINK_SPEED_MASK		0x0F
-#define PCIE_LINK_SPEED_2500		0x01
-#define PCIE_LINK_SPEED_5000		0x02
-#define PCIE_DEVICE_CONTROL2_16ms	0x0005
-
-#ifndef ETH_ADDR_LEN
-#define ETH_ADDR_LEN			6
-#endif
-
-#define PHY_REVISION_MASK		0xFFFFFFF0
-#define MAX_PHY_REG_ADDRESS		0x1F  /* 5 bit address bus (0-0x1F) */
-#define MAX_PHY_MULTI_PAGE_REG		0xF
-
-/* Bit definitions for valid PHY IDs.
- * I = Integrated
- * E = External
- */
-#define M88E1000_E_PHY_ID	0x01410C50
-#define M88E1000_I_PHY_ID	0x01410C30
-#define M88E1011_I_PHY_ID	0x01410C20
-#define IGP01E1000_I_PHY_ID	0x02A80380
-#define M88E1111_I_PHY_ID	0x01410CC0
-#define M88E1543_E_PHY_ID	0x01410EA0
-#define M88E1112_E_PHY_ID	0x01410C90
-#define I347AT4_E_PHY_ID	0x01410DC0
-#define M88E1340M_E_PHY_ID	0x01410DF0
-#define GG82563_E_PHY_ID	0x01410CA0
-#define IGP03E1000_E_PHY_ID	0x02A80390
-#define IFE_E_PHY_ID		0x02A80330
-#define IFE_PLUS_E_PHY_ID	0x02A80320
-#define IFE_C_E_PHY_ID		0x02A80310
-#define I82580_I_PHY_ID		0x015403A0
-#define I350_I_PHY_ID		0x015403B0
-#define I210_I_PHY_ID		0x01410C00
-#define IGP04E1000_E_PHY_ID	0x02A80391
-#define M88_VENDOR		0x0141
-
-/* M88E1000 Specific Registers */
-#define M88E1000_PHY_SPEC_CTRL		0x10  /* PHY Specific Control Reg */
-#define M88E1000_PHY_SPEC_STATUS	0x11  /* PHY Specific Status Reg */
-#define M88E1000_EXT_PHY_SPEC_CTRL	0x14  /* Extended PHY Specific Cntrl */
-#define M88E1000_RX_ERR_CNTR		0x15  /* Receive Error Counter */
-
-#define M88E1000_PHY_PAGE_SELECT	0x1D  /* Reg 29 for pg number setting */
-#define M88E1000_PHY_GEN_CONTROL	0x1E  /* meaning depends on reg 29 */
-
-/* M88E1000 PHY Specific Control Register */
-#define M88E1000_PSCR_POLARITY_REVERSAL	0x0002 /* 1=Polarity Reverse enabled */
-/* MDI Crossover Mode bits 6:5 Manual MDI configuration */
-#define M88E1000_PSCR_MDI_MANUAL_MODE	0x0000
-#define M88E1000_PSCR_MDIX_MANUAL_MODE	0x0020  /* Manual MDIX configuration */
-/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
-#define M88E1000_PSCR_AUTO_X_1000T	0x0040
-/* Auto crossover enabled all speeds */
-#define M88E1000_PSCR_AUTO_X_MODE	0x0060
-#define M88E1000_PSCR_ASSERT_CRS_ON_TX	0x0800 /* 1=Assert CRS on Tx */
-
-/* M88E1000 PHY Specific Status Register */
-#define M88E1000_PSSR_REV_POLARITY	0x0002 /* 1=Polarity reversed */
-#define M88E1000_PSSR_DOWNSHIFT		0x0020 /* 1=Downshifted */
-#define M88E1000_PSSR_MDIX		0x0040 /* 1=MDIX; 0=MDI */
-/* 0 = <50M
- * 1 = 50-80M
- * 2 = 80-110M
- * 3 = 110-140M
- * 4 = >140M
- */
-#define M88E1000_PSSR_CABLE_LENGTH	0x0380
-#define M88E1000_PSSR_LINK		0x0400 /* 1=Link up, 0=Link down */
-#define M88E1000_PSSR_SPD_DPLX_RESOLVED	0x0800 /* 1=Speed & Duplex resolved */
-#define M88E1000_PSSR_SPEED		0xC000 /* Speed, bits 14:15 */
-#define M88E1000_PSSR_1000MBS		0x8000 /* 10=1000Mbs */
-
-#define M88E1000_PSSR_CABLE_LENGTH_SHIFT	7
-
-/* Number of times we will attempt to autonegotiate before downshifting if we
- * are the master
- */
-#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK	0x0C00
-#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X	0x0000
-/* Number of times we will attempt to autonegotiate before downshifting if we
- * are the slave
- */
-#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK	0x0300
-#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X	0x0100
-#define M88E1000_EPSCR_TX_CLK_25	0x0070 /* 25  MHz TX_CLK */
-
-/* Intel I347AT4 Registers */
-#define I347AT4_PCDL		0x10 /* PHY Cable Diagnostics Length */
-#define I347AT4_PCDC		0x15 /* PHY Cable Diagnostics Control */
-#define I347AT4_PAGE_SELECT	0x16
-
-/* I347AT4 Extended PHY Specific Control Register */
-
-/* Number of times we will attempt to autonegotiate before downshifting if we
- * are the master
- */
-#define I347AT4_PSCR_DOWNSHIFT_ENABLE	0x0800
-#define I347AT4_PSCR_DOWNSHIFT_MASK	0x7000
-#define I347AT4_PSCR_DOWNSHIFT_1X	0x0000
-#define I347AT4_PSCR_DOWNSHIFT_2X	0x1000
-#define I347AT4_PSCR_DOWNSHIFT_3X	0x2000
-#define I347AT4_PSCR_DOWNSHIFT_4X	0x3000
-#define I347AT4_PSCR_DOWNSHIFT_5X	0x4000
-#define I347AT4_PSCR_DOWNSHIFT_6X	0x5000
-#define I347AT4_PSCR_DOWNSHIFT_7X	0x6000
-#define I347AT4_PSCR_DOWNSHIFT_8X	0x7000
-
-/* I347AT4 PHY Cable Diagnostics Control */
-#define I347AT4_PCDC_CABLE_LENGTH_UNIT	0x0400 /* 0=cm 1=meters */
-
-/* M88E1112 only registers */
-#define M88E1112_VCT_DSP_DISTANCE	0x001A
-
-/* M88EC018 Rev 2 specific DownShift settings */
-#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK	0x0E00
-#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X	0x0800
-
-/* Bits...
- * 15-5: page
- * 4-0: register offset
- */
-#define GG82563_PAGE_SHIFT	5
-#define GG82563_REG(page, reg)	\
-	(((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
-#define GG82563_MIN_ALT_REG	30
-
-/* GG82563 Specific Registers */
-#define GG82563_PHY_SPEC_CTRL		GG82563_REG(0, 16) /* PHY Spec Cntrl */
-#define GG82563_PHY_PAGE_SELECT		GG82563_REG(0, 22) /* Page Select */
-#define GG82563_PHY_SPEC_CTRL_2		GG82563_REG(0, 26) /* PHY Spec Cntrl2 */
-#define GG82563_PHY_PAGE_SELECT_ALT	GG82563_REG(0, 29) /* Alt Page Select */
-
-/* MAC Specific Control Register */
-#define GG82563_PHY_MAC_SPEC_CTRL	GG82563_REG(2, 21)
-
-#define GG82563_PHY_DSP_DISTANCE	GG82563_REG(5, 26) /* DSP Distance */
-
-/* Page 193 - Port Control Registers */
-/* Kumeran Mode Control */
-#define GG82563_PHY_KMRN_MODE_CTRL	GG82563_REG(193, 16)
-#define GG82563_PHY_PWR_MGMT_CTRL	GG82563_REG(193, 20) /* Pwr Mgt Ctrl */
-
-/* Page 194 - KMRN Registers */
-#define GG82563_PHY_INBAND_CTRL		GG82563_REG(194, 18) /* Inband Ctrl */
-
-/* MDI Control */
-#define E1000_MDIC_REG_MASK	0x001F0000
-#define E1000_MDIC_REG_SHIFT	16
-#define E1000_MDIC_PHY_MASK	0x03E00000
-#define E1000_MDIC_PHY_SHIFT	21
-#define E1000_MDIC_OP_WRITE	0x04000000
-#define E1000_MDIC_OP_READ	0x08000000
-#define E1000_MDIC_READY	0x10000000
-#define E1000_MDIC_ERROR	0x40000000
-#define E1000_MDIC_DEST		0x80000000
-
-/* SerDes Control */
-#define E1000_GEN_CTL_READY		0x80000000
-#define E1000_GEN_CTL_ADDRESS_SHIFT	8
-#define E1000_GEN_POLL_TIMEOUT		640
-
-/* LinkSec register fields */
-#define E1000_LSECTXCAP_SUM_MASK	0x00FF0000
-#define E1000_LSECTXCAP_SUM_SHIFT	16
-#define E1000_LSECRXCAP_SUM_MASK	0x00FF0000
-#define E1000_LSECRXCAP_SUM_SHIFT	16
-
-#define E1000_LSECTXCTRL_EN_MASK	0x00000003
-#define E1000_LSECTXCTRL_DISABLE	0x0
-#define E1000_LSECTXCTRL_AUTH		0x1
-#define E1000_LSECTXCTRL_AUTH_ENCRYPT	0x2
-#define E1000_LSECTXCTRL_AISCI		0x00000020
-#define E1000_LSECTXCTRL_PNTHRSH_MASK	0xFFFFFF00
-#define E1000_LSECTXCTRL_RSV_MASK	0x000000D8
-
-#define E1000_LSECRXCTRL_EN_MASK	0x0000000C
-#define E1000_LSECRXCTRL_EN_SHIFT	2
-#define E1000_LSECRXCTRL_DISABLE	0x0
-#define E1000_LSECRXCTRL_CHECK		0x1
-#define E1000_LSECRXCTRL_STRICT		0x2
-#define E1000_LSECRXCTRL_DROP		0x3
-#define E1000_LSECRXCTRL_PLSH		0x00000040
-#define E1000_LSECRXCTRL_RP		0x00000080
-#define E1000_LSECRXCTRL_RSV_MASK	0xFFFFFF33
-
-/* Tx Rate-Scheduler Config fields */
-#define E1000_RTTBCNRC_RS_ENA		0x80000000
-#define E1000_RTTBCNRC_RF_DEC_MASK	0x00003FFF
-#define E1000_RTTBCNRC_RF_INT_SHIFT	14
-#define E1000_RTTBCNRC_RF_INT_MASK	\
-	(E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
-
-/* DMA Coalescing register fields */
-/* DMA Coalescing Watchdog Timer */
-#define E1000_DMACR_DMACWT_MASK		0x00003FFF
-/* DMA Coalescing Rx Threshold */
-#define E1000_DMACR_DMACTHR_MASK	0x00FF0000
-#define E1000_DMACR_DMACTHR_SHIFT	16
-/* Lx when no PCIe transactions */
-#define E1000_DMACR_DMAC_LX_MASK	0x30000000
-#define E1000_DMACR_DMAC_LX_SHIFT	28
-#define E1000_DMACR_DMAC_EN		0x80000000 /* Enable DMA Coalescing */
-/* DMA Coalescing BMC-to-OS Watchdog Enable */
-#define E1000_DMACR_DC_BMC2OSW_EN	0x00008000
-
-/* DMA Coalescing Transmit Threshold */
-#define E1000_DMCTXTH_DMCTTHR_MASK	0x00000FFF
-
-#define E1000_DMCTLX_TTLX_MASK		0x00000FFF /* Time to LX request */
-
-/* Rx Traffic Rate Threshold */
-#define E1000_DMCRTRH_UTRESH_MASK	0x0007FFFF
-/* Rx packet rate in current window */
-#define E1000_DMCRTRH_LRPRCW		0x80000000
-
-/* DMA Coal Rx Traffic Current Count */
-#define E1000_DMCCNT_CCOUNT_MASK	0x01FFFFFF
-
-/* Flow ctrl Rx Threshold High val */
-#define E1000_FCRTC_RTH_COAL_MASK	0x0003FFF0
-#define E1000_FCRTC_RTH_COAL_SHIFT	4
-/* Lx power decision based on DMA coal */
-#define E1000_PCIEMISC_LX_DECISION	0x00000080
-
-#define E1000_RXPBS_CFG_TS_EN		0x80000000 /* Timestamp in Rx buffer */
-#define E1000_RXPBS_SIZE_I210_MASK	0x0000003F /* Rx packet buffer size */
-#define E1000_TXPB0S_SIZE_I210_MASK	0x0000003F /* Tx packet buffer 0 size */
-
-/* Proxy Filter Control */
-#define E1000_PROXYFC_D0		0x00000001 /* Enable offload in D0 */
-#define E1000_PROXYFC_EX		0x00000004 /* Directed exact proxy */
-#define E1000_PROXYFC_MC		0x00000008 /* Directed MC Proxy */
-#define E1000_PROXYFC_BC		0x00000010 /* Broadcast Proxy Enable */
-#define E1000_PROXYFC_ARP_DIRECTED	0x00000020 /* Directed ARP Proxy Ena */
-#define E1000_PROXYFC_IPV4		0x00000040 /* Directed IPv4 Enable */
-#define E1000_PROXYFC_IPV6		0x00000080 /* Directed IPv6 Enable */
-#define E1000_PROXYFC_NS		0x00000200 /* IPv6 Neighbor Solicitation */
-#define E1000_PROXYFC_ARP		0x00000800 /* ARP Request Proxy Ena */
-/* Proxy Status */
-#define E1000_PROXYS_CLEAR		0xFFFFFFFF /* Clear */
-
-/* Firmware Status */
-#define E1000_FWSTS_FWRI		0x80000000 /* FW Reset Indication */
-/* VF Control */
-#define E1000_VTCTRL_RST		0x04000000 /* Reset VF */
-
-#define E1000_STATUS_LAN_ID_MASK	0x00000000C /* Mask for Lan ID field */
-/* Lan ID bit field offset in status register */
-#define E1000_STATUS_LAN_ID_OFFSET	2
-#define E1000_VFTA_ENTRIES		128
-#ifndef E1000_UNUSEDARG
-#define E1000_UNUSEDARG
-#endif /* E1000_UNUSEDARG */
-#endif /* _E1000_DEFINES_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
deleted file mode 100644
index ed43ef5a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
+++ /dev/null
@@ -1,778 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_HW_H_
-#define _E1000_HW_H_
-
-#include "e1000_osdep.h"
-#include "e1000_regs.h"
-#include "e1000_defines.h"
-
-struct e1000_hw;
-
-#define E1000_DEV_ID_82576			0x10C9
-#define E1000_DEV_ID_82576_FIBER		0x10E6
-#define E1000_DEV_ID_82576_SERDES		0x10E7
-#define E1000_DEV_ID_82576_QUAD_COPPER		0x10E8
-#define E1000_DEV_ID_82576_QUAD_COPPER_ET2	0x1526
-#define E1000_DEV_ID_82576_NS			0x150A
-#define E1000_DEV_ID_82576_NS_SERDES		0x1518
-#define E1000_DEV_ID_82576_SERDES_QUAD		0x150D
-#define E1000_DEV_ID_82575EB_COPPER		0x10A7
-#define E1000_DEV_ID_82575EB_FIBER_SERDES	0x10A9
-#define E1000_DEV_ID_82575GB_QUAD_COPPER	0x10D6
-#define E1000_DEV_ID_82580_COPPER		0x150E
-#define E1000_DEV_ID_82580_FIBER		0x150F
-#define E1000_DEV_ID_82580_SERDES		0x1510
-#define E1000_DEV_ID_82580_SGMII		0x1511
-#define E1000_DEV_ID_82580_COPPER_DUAL		0x1516
-#define E1000_DEV_ID_82580_QUAD_FIBER		0x1527
-#define E1000_DEV_ID_I350_COPPER		0x1521
-#define E1000_DEV_ID_I350_FIBER			0x1522
-#define E1000_DEV_ID_I350_SERDES		0x1523
-#define E1000_DEV_ID_I350_SGMII			0x1524
-#define E1000_DEV_ID_I350_DA4			0x1546
-#define E1000_DEV_ID_I210_COPPER		0x1533
-#define E1000_DEV_ID_I210_COPPER_OEM1		0x1534
-#define E1000_DEV_ID_I210_COPPER_IT		0x1535
-#define E1000_DEV_ID_I210_FIBER			0x1536
-#define E1000_DEV_ID_I210_SERDES		0x1537
-#define E1000_DEV_ID_I210_SGMII			0x1538
-#define E1000_DEV_ID_I210_COPPER_FLASHLESS	0x157B
-#define E1000_DEV_ID_I210_SERDES_FLASHLESS	0x157C
-#define E1000_DEV_ID_I211_COPPER		0x1539
-#define E1000_DEV_ID_I354_BACKPLANE_1GBPS	0x1F40
-#define E1000_DEV_ID_I354_SGMII			0x1F41
-#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS	0x1F45
-#define E1000_DEV_ID_DH89XXCC_SGMII		0x0438
-#define E1000_DEV_ID_DH89XXCC_SERDES		0x043A
-#define E1000_DEV_ID_DH89XXCC_BACKPLANE		0x043C
-#define E1000_DEV_ID_DH89XXCC_SFP		0x0440
-
-#define E1000_REVISION_0	0
-#define E1000_REVISION_1	1
-#define E1000_REVISION_2	2
-#define E1000_REVISION_3	3
-#define E1000_REVISION_4	4
-
-#define E1000_FUNC_0		0
-#define E1000_FUNC_1		1
-#define E1000_FUNC_2		2
-#define E1000_FUNC_3		3
-
-#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0	0
-#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1	3
-#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2	6
-#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3	9
-
-enum e1000_mac_type {
-	e1000_undefined = 0,
-	e1000_82575,
-	e1000_82576,
-	e1000_82580,
-	e1000_i350,
-	e1000_i354,
-	e1000_i210,
-	e1000_i211,
-	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
-};
-
-enum e1000_media_type {
-	e1000_media_type_unknown = 0,
-	e1000_media_type_copper = 1,
-	e1000_media_type_fiber = 2,
-	e1000_media_type_internal_serdes = 3,
-	e1000_num_media_types
-};
-
-enum e1000_nvm_type {
-	e1000_nvm_unknown = 0,
-	e1000_nvm_none,
-	e1000_nvm_eeprom_spi,
-	e1000_nvm_flash_hw,
-	e1000_nvm_invm,
-	e1000_nvm_flash_sw
-};
-
-enum e1000_nvm_override {
-	e1000_nvm_override_none = 0,
-	e1000_nvm_override_spi_small,
-	e1000_nvm_override_spi_large,
-};
-
-enum e1000_phy_type {
-	e1000_phy_unknown = 0,
-	e1000_phy_none,
-	e1000_phy_m88,
-	e1000_phy_igp,
-	e1000_phy_igp_2,
-	e1000_phy_gg82563,
-	e1000_phy_igp_3,
-	e1000_phy_ife,
-	e1000_phy_82580,
-	e1000_phy_vf,
-	e1000_phy_i210,
-};
-
-enum e1000_bus_type {
-	e1000_bus_type_unknown = 0,
-	e1000_bus_type_pci,
-	e1000_bus_type_pcix,
-	e1000_bus_type_pci_express,
-	e1000_bus_type_reserved
-};
-
-enum e1000_bus_speed {
-	e1000_bus_speed_unknown = 0,
-	e1000_bus_speed_33,
-	e1000_bus_speed_66,
-	e1000_bus_speed_100,
-	e1000_bus_speed_120,
-	e1000_bus_speed_133,
-	e1000_bus_speed_2500,
-	e1000_bus_speed_5000,
-	e1000_bus_speed_reserved
-};
-
-enum e1000_bus_width {
-	e1000_bus_width_unknown = 0,
-	e1000_bus_width_pcie_x1,
-	e1000_bus_width_pcie_x2,
-	e1000_bus_width_pcie_x4 = 4,
-	e1000_bus_width_pcie_x8 = 8,
-	e1000_bus_width_32,
-	e1000_bus_width_64,
-	e1000_bus_width_reserved
-};
-
-enum e1000_1000t_rx_status {
-	e1000_1000t_rx_status_not_ok = 0,
-	e1000_1000t_rx_status_ok,
-	e1000_1000t_rx_status_undefined = 0xFF
-};
-
-enum e1000_rev_polarity {
-	e1000_rev_polarity_normal = 0,
-	e1000_rev_polarity_reversed,
-	e1000_rev_polarity_undefined = 0xFF
-};
-
-enum e1000_fc_mode {
-	e1000_fc_none = 0,
-	e1000_fc_rx_pause,
-	e1000_fc_tx_pause,
-	e1000_fc_full,
-	e1000_fc_default = 0xFF
-};
-
-enum e1000_ms_type {
-	e1000_ms_hw_default = 0,
-	e1000_ms_force_master,
-	e1000_ms_force_slave,
-	e1000_ms_auto
-};
-
-enum e1000_smart_speed {
-	e1000_smart_speed_default = 0,
-	e1000_smart_speed_on,
-	e1000_smart_speed_off
-};
-
-enum e1000_serdes_link_state {
-	e1000_serdes_link_down = 0,
-	e1000_serdes_link_autoneg_progress,
-	e1000_serdes_link_autoneg_complete,
-	e1000_serdes_link_forced_up
-};
-
-#ifndef __le16
-#define __le16 u16
-#endif
-#ifndef __le32
-#define __le32 u32
-#endif
-#ifndef __le64
-#define __le64 u64
-#endif
-/* Receive Descriptor */
-struct e1000_rx_desc {
-	__le64 buffer_addr; /* Address of the descriptor's data buffer */
-	__le16 length;      /* Length of data DMAed into data buffer */
-	__le16 csum; /* Packet checksum */
-	u8  status;  /* Descriptor status */
-	u8  errors;  /* Descriptor Errors */
-	__le16 special;
-};
-
-/* Receive Descriptor - Extended */
-union e1000_rx_desc_extended {
-	struct {
-		__le64 buffer_addr;
-		__le64 reserved;
-	} read;
-	struct {
-		struct {
-			__le32 mrq; /* Multiple Rx Queues */
-			union {
-				__le32 rss; /* RSS Hash */
-				struct {
-					__le16 ip_id;  /* IP id */
-					__le16 csum;   /* Packet Checksum */
-				} csum_ip;
-			} hi_dword;
-		} lower;
-		struct {
-			__le32 status_error;  /* ext status/error */
-			__le16 length;
-			__le16 vlan; /* VLAN tag */
-		} upper;
-	} wb;  /* writeback */
-};
-
-#define MAX_PS_BUFFERS 4
-
-/* Number of packet split data buffers (not including the header buffer) */
-#define PS_PAGE_BUFFERS	(MAX_PS_BUFFERS - 1)
-
-/* Receive Descriptor - Packet Split */
-union e1000_rx_desc_packet_split {
-	struct {
-		/* one buffer for protocol header(s), three data buffers */
-		__le64 buffer_addr[MAX_PS_BUFFERS];
-	} read;
-	struct {
-		struct {
-			__le32 mrq;  /* Multiple Rx Queues */
-			union {
-				__le32 rss; /* RSS Hash */
-				struct {
-					__le16 ip_id;    /* IP id */
-					__le16 csum;     /* Packet Checksum */
-				} csum_ip;
-			} hi_dword;
-		} lower;
-		struct {
-			__le32 status_error;  /* ext status/error */
-			__le16 length0;  /* length of buffer 0 */
-			__le16 vlan;  /* VLAN tag */
-		} middle;
-		struct {
-			__le16 header_status;
-			/* length of buffers 1-3 */
-			__le16 length[PS_PAGE_BUFFERS];
-		} upper;
-		__le64 reserved;
-	} wb; /* writeback */
-};
-
-/* Transmit Descriptor */
-struct e1000_tx_desc {
-	__le64 buffer_addr;   /* Address of the descriptor's data buffer */
-	union {
-		__le32 data;
-		struct {
-			__le16 length;  /* Data buffer length */
-			u8 cso;  /* Checksum offset */
-			u8 cmd;  /* Descriptor control */
-		} flags;
-	} lower;
-	union {
-		__le32 data;
-		struct {
-			u8 status; /* Descriptor status */
-			u8 css;  /* Checksum start */
-			__le16 special;
-		} fields;
-	} upper;
-};
-
-/* Offload Context Descriptor */
-struct e1000_context_desc {
-	union {
-		__le32 ip_config;
-		struct {
-			u8 ipcss;  /* IP checksum start */
-			u8 ipcso;  /* IP checksum offset */
-			__le16 ipcse;  /* IP checksum end */
-		} ip_fields;
-	} lower_setup;
-	union {
-		__le32 tcp_config;
-		struct {
-			u8 tucss;  /* TCP checksum start */
-			u8 tucso;  /* TCP checksum offset */
-			__le16 tucse;  /* TCP checksum end */
-		} tcp_fields;
-	} upper_setup;
-	__le32 cmd_and_length;
-	union {
-		__le32 data;
-		struct {
-			u8 status;  /* Descriptor status */
-			u8 hdr_len;  /* Header length */
-			__le16 mss;  /* Maximum segment size */
-		} fields;
-	} tcp_seg_setup;
-};
-
-/* Offload data descriptor */
-struct e1000_data_desc {
-	__le64 buffer_addr;  /* Address of the descriptor's buffer address */
-	union {
-		__le32 data;
-		struct {
-			__le16 length;  /* Data buffer length */
-			u8 typ_len_ext;
-			u8 cmd;
-		} flags;
-	} lower;
-	union {
-		__le32 data;
-		struct {
-			u8 status;  /* Descriptor status */
-			u8 popts;  /* Packet Options */
-			__le16 special;
-		} fields;
-	} upper;
-};
-
-/* Statistics counters collected by the MAC */
-struct e1000_hw_stats {
-	u64 crcerrs;
-	u64 algnerrc;
-	u64 symerrs;
-	u64 rxerrc;
-	u64 mpc;
-	u64 scc;
-	u64 ecol;
-	u64 mcc;
-	u64 latecol;
-	u64 colc;
-	u64 dc;
-	u64 tncrs;
-	u64 sec;
-	u64 cexterr;
-	u64 rlec;
-	u64 xonrxc;
-	u64 xontxc;
-	u64 xoffrxc;
-	u64 xofftxc;
-	u64 fcruc;
-	u64 prc64;
-	u64 prc127;
-	u64 prc255;
-	u64 prc511;
-	u64 prc1023;
-	u64 prc1522;
-	u64 gprc;
-	u64 bprc;
-	u64 mprc;
-	u64 gptc;
-	u64 gorc;
-	u64 gotc;
-	u64 rnbc;
-	u64 ruc;
-	u64 rfc;
-	u64 roc;
-	u64 rjc;
-	u64 mgprc;
-	u64 mgpdc;
-	u64 mgptc;
-	u64 tor;
-	u64 tot;
-	u64 tpr;
-	u64 tpt;
-	u64 ptc64;
-	u64 ptc127;
-	u64 ptc255;
-	u64 ptc511;
-	u64 ptc1023;
-	u64 ptc1522;
-	u64 mptc;
-	u64 bptc;
-	u64 tsctc;
-	u64 tsctfc;
-	u64 iac;
-	u64 icrxptc;
-	u64 icrxatc;
-	u64 ictxptc;
-	u64 ictxatc;
-	u64 ictxqec;
-	u64 ictxqmtc;
-	u64 icrxdmtc;
-	u64 icrxoc;
-	u64 cbtmpc;
-	u64 htdpmc;
-	u64 cbrdpc;
-	u64 cbrmpc;
-	u64 rpthc;
-	u64 hgptc;
-	u64 htcbdpc;
-	u64 hgorc;
-	u64 hgotc;
-	u64 lenerrs;
-	u64 scvpc;
-	u64 hrmpc;
-	u64 doosync;
-	u64 o2bgptc;
-	u64 o2bspc;
-	u64 b2ospc;
-	u64 b2ogprc;
-};
-
-
-struct e1000_phy_stats {
-	u32 idle_errors;
-	u32 receive_errors;
-};
-
-struct e1000_host_mng_dhcp_cookie {
-	u32 signature;
-	u8  status;
-	u8  reserved0;
-	u16 vlan_id;
-	u32 reserved1;
-	u16 reserved2;
-	u8  reserved3;
-	u8  checksum;
-};
-
-/* Host Interface "Rev 1" */
-struct e1000_host_command_header {
-	u8 command_id;
-	u8 command_length;
-	u8 command_options;
-	u8 checksum;
-};
-
-#define E1000_HI_MAX_DATA_LENGTH	252
-struct e1000_host_command_info {
-	struct e1000_host_command_header command_header;
-	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
-};
-
-/* Host Interface "Rev 2" */
-struct e1000_host_mng_command_header {
-	u8  command_id;
-	u8  checksum;
-	u16 reserved1;
-	u16 reserved2;
-	u16 command_length;
-};
-
-#define E1000_HI_MAX_MNG_DATA_LENGTH	0x6F8
-struct e1000_host_mng_command_info {
-	struct e1000_host_mng_command_header command_header;
-	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
-};
-
-#include "e1000_mac.h"
-#include "e1000_phy.h"
-#include "e1000_nvm.h"
-#include "e1000_manage.h"
-#include "e1000_mbx.h"
-
-/* Function pointers for the MAC. */
-struct e1000_mac_operations {
-	s32  (*init_params)(struct e1000_hw *);
-	s32  (*id_led_init)(struct e1000_hw *);
-	s32  (*blink_led)(struct e1000_hw *);
-	bool (*check_mng_mode)(struct e1000_hw *);
-	s32  (*check_for_link)(struct e1000_hw *);
-	s32  (*cleanup_led)(struct e1000_hw *);
-	void (*clear_hw_cntrs)(struct e1000_hw *);
-	void (*clear_vfta)(struct e1000_hw *);
-	s32  (*get_bus_info)(struct e1000_hw *);
-	void (*set_lan_id)(struct e1000_hw *);
-	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
-	s32  (*led_on)(struct e1000_hw *);
-	s32  (*led_off)(struct e1000_hw *);
-	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
-	s32  (*reset_hw)(struct e1000_hw *);
-	s32  (*init_hw)(struct e1000_hw *);
-	void (*shutdown_serdes)(struct e1000_hw *);
-	void (*power_up_serdes)(struct e1000_hw *);
-	s32  (*setup_link)(struct e1000_hw *);
-	s32  (*setup_physical_interface)(struct e1000_hw *);
-	s32  (*setup_led)(struct e1000_hw *);
-	void (*write_vfta)(struct e1000_hw *, u32, u32);
-	void (*config_collision_dist)(struct e1000_hw *);
-	void (*rar_set)(struct e1000_hw *, u8*, u32);
-	s32  (*read_mac_addr)(struct e1000_hw *);
-	s32  (*validate_mdi_setting)(struct e1000_hw *);
-	s32 (*get_thermal_sensor_data)(struct e1000_hw *);
-	s32 (*init_thermal_sensor_thresh)(struct e1000_hw *);
-	s32  (*acquire_swfw_sync)(struct e1000_hw *, u16);
-	void (*release_swfw_sync)(struct e1000_hw *, u16);
-};
-
-/* When to use various PHY register access functions:
- *
- *                 Func   Caller
- *   Function      Does   Does    When to use
- *   ~~~~~~~~~~~~  ~~~~~  ~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *   X_reg         L,P,A  n/a     for simple PHY reg accesses
- *   X_reg_locked  P,A    L       for multiple accesses of different regs
- *                                on different pages
- *   X_reg_page    A      L,P     for multiple accesses of different regs
- *                                on the same page
- *
- * Where X=[read|write], L=locking, P=sets page, A=register access
- *
- */
-struct e1000_phy_operations {
-	s32  (*init_params)(struct e1000_hw *);
-	s32  (*acquire)(struct e1000_hw *);
-	s32  (*check_polarity)(struct e1000_hw *);
-	s32  (*check_reset_block)(struct e1000_hw *);
-	s32  (*commit)(struct e1000_hw *);
-	s32  (*force_speed_duplex)(struct e1000_hw *);
-	s32  (*get_cfg_done)(struct e1000_hw *hw);
-	s32  (*get_cable_length)(struct e1000_hw *);
-	s32  (*get_info)(struct e1000_hw *);
-	s32  (*set_page)(struct e1000_hw *, u16);
-	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
-	s32  (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
-	s32  (*read_reg_page)(struct e1000_hw *, u32, u16 *);
-	void (*release)(struct e1000_hw *);
-	s32  (*reset)(struct e1000_hw *);
-	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
-	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
-	s32  (*write_reg)(struct e1000_hw *, u32, u16);
-	s32  (*write_reg_locked)(struct e1000_hw *, u32, u16);
-	s32  (*write_reg_page)(struct e1000_hw *, u32, u16);
-	void (*power_up)(struct e1000_hw *);
-	void (*power_down)(struct e1000_hw *);
-	s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
-	s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
-};
-
-/* Function pointers for the NVM. */
-struct e1000_nvm_operations {
-	s32  (*init_params)(struct e1000_hw *);
-	s32  (*acquire)(struct e1000_hw *);
-	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
-	void (*release)(struct e1000_hw *);
-	void (*reload)(struct e1000_hw *);
-	s32  (*update)(struct e1000_hw *);
-	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
-	s32  (*validate)(struct e1000_hw *);
-	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
-};
-
-#define E1000_MAX_SENSORS		3
-
-struct e1000_thermal_diode_data {
-	u8 location;
-	u8 temp;
-	u8 caution_thresh;
-	u8 max_op_thresh;
-};
-
-struct e1000_thermal_sensor_data {
-	struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS];
-};
-
-struct e1000_mac_info {
-	struct e1000_mac_operations ops;
-	u8 addr[ETH_ADDR_LEN];
-	u8 perm_addr[ETH_ADDR_LEN];
-
-	enum e1000_mac_type type;
-
-	u32 collision_delta;
-	u32 ledctl_default;
-	u32 ledctl_mode1;
-	u32 ledctl_mode2;
-	u32 mc_filter_type;
-	u32 tx_packet_delta;
-	u32 txcw;
-
-	u16 current_ifs_val;
-	u16 ifs_max_val;
-	u16 ifs_min_val;
-	u16 ifs_ratio;
-	u16 ifs_step_size;
-	u16 mta_reg_count;
-	u16 uta_reg_count;
-
-	/* Maximum size of the MTA register table in all supported adapters */
-	#define MAX_MTA_REG 128
-	u32 mta_shadow[MAX_MTA_REG];
-	u16 rar_entry_count;
-
-	u8  forced_speed_duplex;
-
-	bool adaptive_ifs;
-	bool has_fwsm;
-	bool arc_subsystem_valid;
-	bool asf_firmware_present;
-	bool autoneg;
-	bool autoneg_failed;
-	bool get_link_status;
-	bool in_ifs_mode;
-	enum e1000_serdes_link_state serdes_link_state;
-	bool serdes_has_link;
-	bool tx_pkt_filtering;
-	struct e1000_thermal_sensor_data thermal_sensor_data;
-};
-
-struct e1000_phy_info {
-	struct e1000_phy_operations ops;
-	enum e1000_phy_type type;
-
-	enum e1000_1000t_rx_status local_rx;
-	enum e1000_1000t_rx_status remote_rx;
-	enum e1000_ms_type ms_type;
-	enum e1000_ms_type original_ms_type;
-	enum e1000_rev_polarity cable_polarity;
-	enum e1000_smart_speed smart_speed;
-
-	u32 addr;
-	u32 id;
-	u32 reset_delay_us; /* in usec */
-	u32 revision;
-
-	enum e1000_media_type media_type;
-
-	u16 autoneg_advertised;
-	u16 autoneg_mask;
-	u16 cable_length;
-	u16 max_cable_length;
-	u16 min_cable_length;
-
-	u8 mdix;
-
-	bool disable_polarity_correction;
-	bool is_mdix;
-	bool polarity_correction;
-	bool reset_disable;
-	bool speed_downgraded;
-	bool autoneg_wait_to_complete;
-};
-
-struct e1000_nvm_info {
-	struct e1000_nvm_operations ops;
-	enum e1000_nvm_type type;
-	enum e1000_nvm_override override;
-
-	u32 flash_bank_size;
-	u32 flash_base_addr;
-
-	u16 word_size;
-	u16 delay_usec;
-	u16 address_bits;
-	u16 opcode_bits;
-	u16 page_size;
-};
-
-struct e1000_bus_info {
-	enum e1000_bus_type type;
-	enum e1000_bus_speed speed;
-	enum e1000_bus_width width;
-
-	u16 func;
-	u16 pci_cmd_word;
-};
-
-struct e1000_fc_info {
-	u32 high_water;  /* Flow control high-water mark */
-	u32 low_water;  /* Flow control low-water mark */
-	u16 pause_time;  /* Flow control pause timer */
-	u16 refresh_time;  /* Flow control refresh timer */
-	bool send_xon;  /* Flow control send XON */
-	bool strict_ieee;  /* Strict IEEE mode */
-	enum e1000_fc_mode current_mode;  /* FC mode in effect */
-	enum e1000_fc_mode requested_mode;  /* FC mode requested by caller */
-};
-
-struct e1000_mbx_operations {
-	s32 (*init_params)(struct e1000_hw *hw);
-	s32 (*read)(struct e1000_hw *, u32 *, u16,  u16);
-	s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
-	s32 (*read_posted)(struct e1000_hw *, u32 *, u16,  u16);
-	s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
-	s32 (*check_for_msg)(struct e1000_hw *, u16);
-	s32 (*check_for_ack)(struct e1000_hw *, u16);
-	s32 (*check_for_rst)(struct e1000_hw *, u16);
-};
-
-struct e1000_mbx_stats {
-	u32 msgs_tx;
-	u32 msgs_rx;
-
-	u32 acks;
-	u32 reqs;
-	u32 rsts;
-};
-
-struct e1000_mbx_info {
-	struct e1000_mbx_operations ops;
-	struct e1000_mbx_stats stats;
-	u32 timeout;
-	u32 usec_delay;
-	u16 size;
-};
-
-struct e1000_dev_spec_82575 {
-	bool sgmii_active;
-	bool global_device_reset;
-	bool eee_disable;
-	bool module_plugged;
-	bool clear_semaphore_once;
-	u32 mtu;
-	struct sfp_e1000_flags eth_flags;
-	u8 media_port;
-	bool media_changed;
-};
-
-struct e1000_dev_spec_vf {
-	u32 vf_number;
-	u32 v2p_mailbox;
-};
-
-struct e1000_hw {
-	void *back;
-
-	u8 __iomem *hw_addr;
-	u8 __iomem *flash_address;
-	unsigned long io_base;
-
-	struct e1000_mac_info  mac;
-	struct e1000_fc_info   fc;
-	struct e1000_phy_info  phy;
-	struct e1000_nvm_info  nvm;
-	struct e1000_bus_info  bus;
-	struct e1000_mbx_info mbx;
-	struct e1000_host_mng_dhcp_cookie mng_cookie;
-
-	union {
-		struct e1000_dev_spec_82575 _82575;
-		struct e1000_dev_spec_vf vf;
-	} dev_spec;
-
-	u16 device_id;
-	u16 subsystem_vendor_id;
-	u16 subsystem_device_id;
-	u16 vendor_id;
-
-	u8  revision_id;
-};
-
-#include "e1000_82575.h"
-#include "e1000_i210.h"
-
-/* These functions must be implemented by drivers */
-s32  e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
-s32  e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
deleted file mode 100644
index a4fabc3a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
+++ /dev/null
@@ -1,894 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-
-static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw);
-static void e1000_release_nvm_i210(struct e1000_hw *hw);
-static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw);
-static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
-				u16 *data);
-static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw);
-static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
-
-/**
- *  e1000_acquire_nvm_i210 - Request for access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Acquire the necessary semaphores for exclusive access to the EEPROM.
- *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
- *  Return successful if access grant bit set, else clear the request for
- *  EEPROM access and return -E1000_ERR_NVM (-1).
- **/
-static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_acquire_nvm_i210");
-
-	ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
-
-	return ret_val;
-}
-
-/**
- *  e1000_release_nvm_i210 - Release exclusive access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
- *  then release the semaphores acquired.
- **/
-static void e1000_release_nvm_i210(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_release_nvm_i210");
-
-	e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
-}
-
-/**
- *  e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
-	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
-
-	DEBUGFUNC("e1000_acquire_swfw_sync_i210");
-
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore_i210(hw)) {
-			ret_val = -E1000_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore_generic(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		ret_val = -E1000_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_swfw_sync_i210 - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync_i210");
-
-	while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore_generic(hw);
-}
-
-/**
- *  e1000_get_hw_semaphore_i210 - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_i210");
-
-	/* Get the SW semaphore */
-	while (i < timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == timeout) {
-		/* In rare circumstances, the SW semaphore may already be held
-		 * unintentionally. Clear the semaphore once before giving up.
-		 */
-		if (hw->dev_spec._82575.clear_semaphore_once) {
-			hw->dev_spec._82575.clear_semaphore_once = false;
-			e1000_put_hw_semaphore_generic(hw);
-			for (i = 0; i < timeout; i++) {
-				swsm = E1000_READ_REG(hw, E1000_SWSM);
-				if (!(swsm & E1000_SWSM_SMBI))
-					break;
-
-				usec_delay(50);
-			}
-		}
-
-		/* If we do not have the semaphore here, we have to give up. */
-		if (i == timeout) {
-			DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-			return -E1000_ERR_NVM;
-		}
-	}
-
-	/* Get the FW semaphore. */
-	for (i = 0; i < timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore_generic(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register
- *  @hw: pointer to the HW structure
- *  @offset: offset of word in the Shadow Ram to read
- *  @words: number of words to read
- *  @data: word read from the Shadow Ram
- *
- *  Reads a 16 bit word from the Shadow Ram using the EERD register.
- *  Uses necessary synchronization semaphores.
- **/
-s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
-			     u16 *data)
-{
-	s32 status = E1000_SUCCESS;
-	u16 i, count;
-
-	DEBUGFUNC("e1000_read_nvm_srrd_i210");
-
-	/* We cannot hold synchronization semaphores for too long,
-	 * because of forceful takeover procedure. However it is more efficient
-	 * to read in bursts than synchronizing access for each word. */
-	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
-		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
-			E1000_EERD_EEWR_MAX_COUNT : (words - i);
-		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
-			status = e1000_read_nvm_eerd(hw, offset, count,
-						     data + i);
-			hw->nvm.ops.release(hw);
-		} else {
-			status = E1000_ERR_SWFW_SYNC;
-		}
-
-		if (status != E1000_SUCCESS)
-			break;
-	}
-
-	return status;
-}
-
-/**
- *  e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR
- *  @hw: pointer to the HW structure
- *  @offset: offset within the Shadow RAM to be written to
- *  @words: number of words to write
- *  @data: 16 bit word(s) to be written to the Shadow RAM
- *
- *  Writes data to Shadow RAM at offset using EEWR register.
- *
- *  If e1000_update_nvm_checksum is not called after this function , the
- *  data will not be committed to FLASH and also Shadow RAM will most likely
- *  contain an invalid checksum.
- *
- *  If error code is returned, data and Shadow RAM may be inconsistent - buffer
- *  partially written.
- **/
-s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
-			      u16 *data)
-{
-	s32 status = E1000_SUCCESS;
-	u16 i, count;
-
-	DEBUGFUNC("e1000_write_nvm_srwr_i210");
-
-	/* We cannot hold synchronization semaphores for too long,
-	 * because of forceful takeover procedure. However it is more efficient
-	 * to write in bursts than synchronizing access for each word. */
-	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
-		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
-			E1000_EERD_EEWR_MAX_COUNT : (words - i);
-		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
-			status = e1000_write_nvm_srwr(hw, offset, count,
-						      data + i);
-			hw->nvm.ops.release(hw);
-		} else {
-			status = E1000_ERR_SWFW_SYNC;
-		}
-
-		if (status != E1000_SUCCESS)
-			break;
-	}
-
-	return status;
-}
-
-/**
- *  e1000_write_nvm_srwr - Write to Shadow Ram using EEWR
- *  @hw: pointer to the HW structure
- *  @offset: offset within the Shadow Ram to be written to
- *  @words: number of words to write
- *  @data: 16 bit word(s) to be written to the Shadow Ram
- *
- *  Writes data to Shadow Ram at offset using EEWR register.
- *
- *  If e1000_update_nvm_checksum is not called after this function , the
- *  Shadow Ram will most likely contain an invalid checksum.
- **/
-static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
-				u16 *data)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 i, k, eewr = 0;
-	u32 attempts = 100000;
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_write_nvm_srwr");
-
-	/*
-	 * A check for invalid values:  offset too large, too many words,
-	 * too many words for the offset, and not enough words.
-	 */
-	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
-	    (words == 0)) {
-		DEBUGOUT("nvm parameter(s) out of bounds\n");
-		ret_val = -E1000_ERR_NVM;
-		goto out;
-	}
-
-	for (i = 0; i < words; i++) {
-		eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
-			(data[i] << E1000_NVM_RW_REG_DATA) |
-			E1000_NVM_RW_REG_START;
-
-		E1000_WRITE_REG(hw, E1000_SRWR, eewr);
-
-		for (k = 0; k < attempts; k++) {
-			if (E1000_NVM_RW_REG_DONE &
-			    E1000_READ_REG(hw, E1000_SRWR)) {
-				ret_val = E1000_SUCCESS;
-				break;
-			}
-			usec_delay(5);
-		}
-
-		if (ret_val != E1000_SUCCESS) {
-			DEBUGOUT("Shadow RAM write EEWR timed out\n");
-			break;
-		}
-	}
-
-out:
-	return ret_val;
-}
-
-/** e1000_read_invm_word_i210 - Reads OTP
- *  @hw: pointer to the HW structure
- *  @address: the word address (aka eeprom offset) to read
- *  @data: pointer to the data read
- *
- *  Reads 16-bit words from the OTP. Return error when the word is not
- *  stored in OTP.
- **/
-static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
-{
-	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
-	u32 invm_dword;
-	u16 i;
-	u8 record_type, word_address;
-
-	DEBUGFUNC("e1000_read_invm_word_i210");
-
-	for (i = 0; i < E1000_INVM_SIZE; i++) {
-		invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
-		/* Get record type */
-		record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
-		if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
-			break;
-		if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
-			i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
-		if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
-			i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
-		if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
-			word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
-			if (word_address == address) {
-				*data = INVM_DWORD_TO_WORD_DATA(invm_dword);
-				DEBUGOUT2("Read INVM Word 0x%02x = %x",
-					  address, *data);
-				status = E1000_SUCCESS;
-				break;
-			}
-		}
-	}
-	if (status != E1000_SUCCESS)
-		DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address);
-	return status;
-}
-
-/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211
- *  @hw: pointer to the HW structure
- *  @address: the word address (aka eeprom offset) to read
- *  @data: pointer to the data read
- *
- *  Wrapper function to return data formerly found in the NVM.
- **/
-static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset,
-				u16 E1000_UNUSEDARG words, u16 *data)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_read_invm_i210");
-
-	/* Only the MAC addr is required to be present in the iNVM */
-	switch (offset) {
-	case NVM_MAC_ADDR:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]);
-		ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1,
-						     &data[1]);
-		ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2,
-						     &data[2]);
-		if (ret_val != E1000_SUCCESS)
-			DEBUGOUT("MAC Addr not found in iNVM\n");
-		break;
-	case NVM_INIT_CTRL_2:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
-			*data = NVM_INIT_CTRL_2_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
-		}
-		break;
-	case NVM_INIT_CTRL_4:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
-			*data = NVM_INIT_CTRL_4_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
-		}
-		break;
-	case NVM_LED_1_CFG:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
-			*data = NVM_LED_1_CFG_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
-		}
-		break;
-	case NVM_LED_0_2_CFG:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
-			*data = NVM_LED_0_2_CFG_DEFAULT_I211;
-			ret_val = E1000_SUCCESS;
-		}
-		break;
-	case NVM_ID_LED_SETTINGS:
-		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
-		if (ret_val != E1000_SUCCESS) {
-			*data = ID_LED_RESERVED_FFFF;
-			ret_val = E1000_SUCCESS;
-		}
-		break;
-	case NVM_SUB_DEV_ID:
-		*data = hw->subsystem_device_id;
-		break;
-	case NVM_SUB_VEN_ID:
-		*data = hw->subsystem_vendor_id;
-		break;
-	case NVM_DEV_ID:
-		*data = hw->device_id;
-		break;
-	case NVM_VEN_ID:
-		*data = hw->vendor_id;
-		break;
-	default:
-		DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset);
-		*data = NVM_RESERVED_WORD;
-		break;
-	}
-	return ret_val;
-}
-
-/**
- *  e1000_read_invm_version - Reads iNVM version and image type
- *  @hw: pointer to the HW structure
- *  @invm_ver: version structure for the version read
- *
- *  Reads iNVM version and image type.
- **/
-s32 e1000_read_invm_version(struct e1000_hw *hw,
-			    struct e1000_fw_version *invm_ver)
-{
-	u32 *record = NULL;
-	u32 *next_record = NULL;
-	u32 i = 0;
-	u32 invm_dword = 0;
-	u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE /
-					     E1000_INVM_RECORD_SIZE_IN_BYTES);
-	u32 buffer[E1000_INVM_SIZE];
-	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
-	u16 version = 0;
-
-	DEBUGFUNC("e1000_read_invm_version");
-
-	/* Read iNVM memory */
-	for (i = 0; i < E1000_INVM_SIZE; i++) {
-		invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
-		buffer[i] = invm_dword;
-	}
-
-	/* Read version number */
-	for (i = 1; i < invm_blocks; i++) {
-		record = &buffer[invm_blocks - i];
-		next_record = &buffer[invm_blocks - i + 1];
-
-		/* Check if we have first version location used */
-		if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
-			version = 0;
-			status = E1000_SUCCESS;
-			break;
-		}
-		/* Check if we have second version location used */
-		else if ((i == 1) &&
-			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
-			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
-			status = E1000_SUCCESS;
-			break;
-		}
-		/*
-		 * Check if we have odd version location
-		 * used and it is the last one used
-		 */
-		else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
-			 ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
-			 (i != 1))) {
-			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
-				  >> 13;
-			status = E1000_SUCCESS;
-			break;
-		}
-		/*
-		 * Check if we have even version location
-		 * used and it is the last one used
-		 */
-		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
-			 ((*record & 0x3) == 0)) {
-			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
-			status = E1000_SUCCESS;
-			break;
-		}
-	}
-
-	if (status == E1000_SUCCESS) {
-		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
-					>> E1000_INVM_MAJOR_SHIFT;
-		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
-	}
-	/* Read Image Type */
-	for (i = 1; i < invm_blocks; i++) {
-		record = &buffer[invm_blocks - i];
-		next_record = &buffer[invm_blocks - i + 1];
-
-		/* Check if we have image type in first location used */
-		if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
-			invm_ver->invm_img_type = 0;
-			status = E1000_SUCCESS;
-			break;
-		}
-		/* Check if we have image type in first location used */
-		else if ((((*record & 0x3) == 0) &&
-			 ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
-			 ((((*record & 0x3) != 0) && (i != 1)))) {
-			invm_ver->invm_img_type =
-				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
-			status = E1000_SUCCESS;
-			break;
-		}
-	}
-	return status;
-}
-
-/**
- *  e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
- *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
- **/
-s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw)
-{
-	s32 status = E1000_SUCCESS;
-	s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
-
-	DEBUGFUNC("e1000_validate_nvm_checksum_i210");
-
-	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
-
-		/*
-		 * Replace the read function with semaphore grabbing with
-		 * the one that skips this for a while.
-		 * We have semaphore taken already here.
-		 */
-		read_op_ptr = hw->nvm.ops.read;
-		hw->nvm.ops.read = e1000_read_nvm_eerd;
-
-		status = e1000_validate_nvm_checksum_generic(hw);
-
-		/* Revert original read operation. */
-		hw->nvm.ops.read = read_op_ptr;
-
-		hw->nvm.ops.release(hw);
-	} else {
-		status = E1000_ERR_SWFW_SYNC;
-	}
-
-	return status;
-}
-
-
-/**
- *  e1000_update_nvm_checksum_i210 - Update EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
- *  up to the checksum.  Then calculates the EEPROM checksum and writes the
- *  value to the EEPROM. Next commit EEPROM data onto the Flash.
- **/
-s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 checksum = 0;
-	u16 i, nvm_data;
-
-	DEBUGFUNC("e1000_update_nvm_checksum_i210");
-
-	/*
-	 * Read the first word from the EEPROM. If this times out or fails, do
-	 * not continue or we could be in for a very long wait while every
-	 * EEPROM read fails
-	 */
-	ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data);
-	if (ret_val != E1000_SUCCESS) {
-		DEBUGOUT("EEPROM read failed\n");
-		goto out;
-	}
-
-	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
-		/*
-		 * Do not use hw->nvm.ops.write, hw->nvm.ops.read
-		 * because we do not want to take the synchronization
-		 * semaphores twice here.
-		 */
-
-		for (i = 0; i < NVM_CHECKSUM_REG; i++) {
-			ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data);
-			if (ret_val) {
-				hw->nvm.ops.release(hw);
-				DEBUGOUT("NVM Read Error while updating checksum.\n");
-				goto out;
-			}
-			checksum += nvm_data;
-		}
-		checksum = (u16) NVM_SUM - checksum;
-		ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
-						&checksum);
-		if (ret_val != E1000_SUCCESS) {
-			hw->nvm.ops.release(hw);
-			DEBUGOUT("NVM Write Error while updating checksum.\n");
-			goto out;
-		}
-
-		hw->nvm.ops.release(hw);
-
-		ret_val = e1000_update_flash_i210(hw);
-	} else {
-		ret_val = E1000_ERR_SWFW_SYNC;
-	}
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_get_flash_presence_i210 - Check if flash device is detected.
- *  @hw: pointer to the HW structure
- *
- **/
-bool e1000_get_flash_presence_i210(struct e1000_hw *hw)
-{
-	u32 eec = 0;
-	bool ret_val = false;
-
-	DEBUGFUNC("e1000_get_flash_presence_i210");
-
-	eec = E1000_READ_REG(hw, E1000_EECD);
-
-	if (eec & E1000_EECD_FLASH_DETECTED_I210)
-		ret_val = true;
-
-	return ret_val;
-}
-
-/**
- *  e1000_update_flash_i210 - Commit EEPROM to the flash
- *  @hw: pointer to the HW structure
- *
- **/
-s32 e1000_update_flash_i210(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u32 flup;
-
-	DEBUGFUNC("e1000_update_flash_i210");
-
-	ret_val = e1000_pool_flash_update_done_i210(hw);
-	if (ret_val == -E1000_ERR_NVM) {
-		DEBUGOUT("Flash update time out\n");
-		goto out;
-	}
-
-	flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210;
-	E1000_WRITE_REG(hw, E1000_EECD, flup);
-
-	ret_val = e1000_pool_flash_update_done_i210(hw);
-	if (ret_val == E1000_SUCCESS)
-		DEBUGOUT("Flash update complete\n");
-	else
-		DEBUGOUT("Flash update time out\n");
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_pool_flash_update_done_i210 - Pool FLUDONE status.
- *  @hw: pointer to the HW structure
- *
- **/
-s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw)
-{
-	s32 ret_val = -E1000_ERR_NVM;
-	u32 i, reg;
-
-	DEBUGFUNC("e1000_pool_flash_update_done_i210");
-
-	for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
-		reg = E1000_READ_REG(hw, E1000_EECD);
-		if (reg & E1000_EECD_FLUDONE_I210) {
-			ret_val = E1000_SUCCESS;
-			break;
-		}
-		usec_delay(5);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers
- *  @hw: pointer to the HW structure
- *
- *  Initialize the i210/i211 NVM parameters and function pointers.
- **/
-static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	struct e1000_nvm_info *nvm = &hw->nvm;
-
-	DEBUGFUNC("e1000_init_nvm_params_i210");
-
-	ret_val = e1000_init_nvm_params_82575(hw);
-	nvm->ops.acquire = e1000_acquire_nvm_i210;
-	nvm->ops.release = e1000_release_nvm_i210;
-	nvm->ops.valid_led_default = e1000_valid_led_default_i210;
-	if (e1000_get_flash_presence_i210(hw)) {
-		hw->nvm.type = e1000_nvm_flash_hw;
-		nvm->ops.read    = e1000_read_nvm_srrd_i210;
-		nvm->ops.write   = e1000_write_nvm_srwr_i210;
-		nvm->ops.validate = e1000_validate_nvm_checksum_i210;
-		nvm->ops.update   = e1000_update_nvm_checksum_i210;
-	} else {
-		hw->nvm.type = e1000_nvm_invm;
-		nvm->ops.read     = e1000_read_invm_i210;
-		nvm->ops.write    = e1000_null_write_nvm;
-		nvm->ops.validate = e1000_null_ops_generic;
-		nvm->ops.update   = e1000_null_ops_generic;
-	}
-	return ret_val;
-}
-
-/**
- *  e1000_init_function_pointers_i210 - Init func ptrs.
- *  @hw: pointer to the HW structure
- *
- *  Called to initialize all function pointers and parameters.
- **/
-void e1000_init_function_pointers_i210(struct e1000_hw *hw)
-{
-	e1000_init_function_pointers_82575(hw);
-	hw->nvm.ops.init_params = e1000_init_nvm_params_i210;
-
-	return;
-}
-
-/**
- *  e1000_valid_led_default_i210 - Verify a valid default LED config
- *  @hw: pointer to the HW structure
- *  @data: pointer to the NVM (EEPROM)
- *
- *  Read the EEPROM for the current default LED configuration.  If the
- *  LED configuration is not valid, set to a valid LED configuration.
- **/
-static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_valid_led_default_i210");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		goto out;
-	}
-
-	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
-		switch (hw->phy.media_type) {
-		case e1000_media_type_internal_serdes:
-			*data = ID_LED_DEFAULT_I210_SERDES;
-			break;
-		case e1000_media_type_copper:
-		default:
-			*data = ID_LED_DEFAULT_I210;
-			break;
-		}
-	}
-out:
-	return ret_val;
-}
-
-/**
- *  __e1000_access_xmdio_reg - Read/write XMDIO register
- *  @hw: pointer to the HW structure
- *  @address: XMDIO address to program
- *  @dev_addr: device address to program
- *  @data: pointer to value to read/write from/to the XMDIO address
- *  @read: boolean flag to indicate read or write
- **/
-static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address,
-				    u8 dev_addr, u16 *data, bool read)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("__e1000_access_xmdio_reg");
-
-	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA |
-							 dev_addr);
-	if (ret_val)
-		return ret_val;
-
-	if (read)
-		ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data);
-	else
-		ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data);
-	if (ret_val)
-		return ret_val;
-
-	/* Recalibrate the device back to 0 */
-	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0);
-	if (ret_val)
-		return ret_val;
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_xmdio_reg - Read XMDIO register
- *  @hw: pointer to the HW structure
- *  @addr: XMDIO address to program
- *  @dev_addr: device address to program
- *  @data: value to be read from the EMI address
- **/
-s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data)
-{
-	DEBUGFUNC("e1000_read_xmdio_reg");
-
-	return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true);
-}
-
-/**
- *  e1000_write_xmdio_reg - Write XMDIO register
- *  @hw: pointer to the HW structure
- *  @addr: XMDIO address to program
- *  @dev_addr: device address to program
- *  @data: value to be written to the XMDIO address
- **/
-s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data)
-{
-	DEBUGFUNC("e1000_read_xmdio_reg");
-
-	return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false);
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
deleted file mode 100644
index 9df7c203..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_I210_H_
-#define _E1000_I210_H_
-
-bool e1000_get_flash_presence_i210(struct e1000_hw *hw);
-s32 e1000_update_flash_i210(struct e1000_hw *hw);
-s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw);
-s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw);
-s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset,
-			      u16 words, u16 *data);
-s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
-			     u16 words, u16 *data);
-s32 e1000_read_invm_version(struct e1000_hw *hw,
-			    struct e1000_fw_version *invm_ver);
-s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
-void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
-s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
-			 u16 *data);
-s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
-			  u16 data);
-
-#define E1000_STM_OPCODE		0xDB00
-#define E1000_EEPROM_FLASH_SIZE_WORD	0x11
-
-#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \
-	(u8)((invm_dword) & 0x7)
-#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \
-	(u8)(((invm_dword) & 0x0000FE00) >> 9)
-#define INVM_DWORD_TO_WORD_DATA(invm_dword) \
-	(u16)(((invm_dword) & 0xFFFF0000) >> 16)
-
-enum E1000_INVM_STRUCTURE_TYPE {
-	E1000_INVM_UNINITIALIZED_STRUCTURE		= 0x00,
-	E1000_INVM_WORD_AUTOLOAD_STRUCTURE		= 0x01,
-	E1000_INVM_CSR_AUTOLOAD_STRUCTURE		= 0x02,
-	E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE	= 0x03,
-	E1000_INVM_RSA_KEY_SHA256_STRUCTURE		= 0x04,
-	E1000_INVM_INVALIDATED_STRUCTURE		= 0x0F,
-};
-
-#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS	8
-#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS	1
-#define E1000_INVM_ULT_BYTES_SIZE	8
-#define E1000_INVM_RECORD_SIZE_IN_BYTES	4
-#define E1000_INVM_VER_FIELD_ONE	0x1FF8
-#define E1000_INVM_VER_FIELD_TWO	0x7FE000
-#define E1000_INVM_IMGTYPE_FIELD	0x1F800000
-
-#define E1000_INVM_MAJOR_MASK	0x3F0
-#define E1000_INVM_MINOR_MASK	0xF
-#define E1000_INVM_MAJOR_SHIFT	4
-
-#define ID_LED_DEFAULT_I210		((ID_LED_OFF1_ON2  << 8) | \
-					 (ID_LED_DEF1_DEF2 <<  4) | \
-					 (ID_LED_OFF1_OFF2))
-#define ID_LED_DEFAULT_I210_SERDES	((ID_LED_DEF1_DEF2 << 8) | \
-					 (ID_LED_DEF1_DEF2 <<  4) | \
-					 (ID_LED_OFF1_ON2))
-
-/* NVM offset defaults for I211 devices */
-#define NVM_INIT_CTRL_2_DEFAULT_I211	0X7243
-#define NVM_INIT_CTRL_4_DEFAULT_I211	0x00C1
-#define NVM_LED_1_CFG_DEFAULT_I211	0x0184
-#define NVM_LED_0_2_CFG_DEFAULT_I211	0x200C
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
deleted file mode 100644
index 13a42267..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
+++ /dev/null
@@ -1,2081 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw);
-static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
-static void e1000_config_collision_dist_generic(struct e1000_hw *hw);
-static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
-
-/**
- *  e1000_init_mac_ops_generic - Initialize MAC function pointers
- *  @hw: pointer to the HW structure
- *
- *  Setups up the function pointers to no-op functions
- **/
-void e1000_init_mac_ops_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	DEBUGFUNC("e1000_init_mac_ops_generic");
-
-	/* General Setup */
-	mac->ops.init_params = e1000_null_ops_generic;
-	mac->ops.init_hw = e1000_null_ops_generic;
-	mac->ops.reset_hw = e1000_null_ops_generic;
-	mac->ops.setup_physical_interface = e1000_null_ops_generic;
-	mac->ops.get_bus_info = e1000_null_ops_generic;
-	mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie;
-	mac->ops.read_mac_addr = e1000_read_mac_addr_generic;
-	mac->ops.config_collision_dist = e1000_config_collision_dist_generic;
-	mac->ops.clear_hw_cntrs = e1000_null_mac_generic;
-	/* LED */
-	mac->ops.cleanup_led = e1000_null_ops_generic;
-	mac->ops.setup_led = e1000_null_ops_generic;
-	mac->ops.blink_led = e1000_null_ops_generic;
-	mac->ops.led_on = e1000_null_ops_generic;
-	mac->ops.led_off = e1000_null_ops_generic;
-	/* LINK */
-	mac->ops.setup_link = e1000_null_ops_generic;
-	mac->ops.get_link_up_info = e1000_null_link_info;
-	mac->ops.check_for_link = e1000_null_ops_generic;
-	/* Management */
-	mac->ops.check_mng_mode = e1000_null_mng_mode;
-	/* VLAN, MC, etc. */
-	mac->ops.update_mc_addr_list = e1000_null_update_mc;
-	mac->ops.clear_vfta = e1000_null_mac_generic;
-	mac->ops.write_vfta = e1000_null_write_vfta;
-	mac->ops.rar_set = e1000_rar_set_generic;
-	mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic;
-}
-
-/**
- *  e1000_null_ops_generic - No-op function, returns 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_null_ops_generic");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_mac_generic - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_null_mac_generic");
-	return;
-}
-
-/**
- *  e1000_null_link_info - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw,
-			 u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d)
-{
-	DEBUGFUNC("e1000_null_link_info");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_mng_mode - No-op function, return false
- *  @hw: pointer to the HW structure
- **/
-bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_null_mng_mode");
-	return false;
-}
-
-/**
- *  e1000_null_update_mc - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw,
-			  u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
-{
-	DEBUGFUNC("e1000_null_update_mc");
-	return;
-}
-
-/**
- *  e1000_null_write_vfta - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw,
-			   u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b)
-{
-	DEBUGFUNC("e1000_null_write_vfta");
-	return;
-}
-
-/**
- *  e1000_null_rar_set - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw,
-			u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
-{
-	DEBUGFUNC("e1000_null_rar_set");
-	return;
-}
-
-/**
- *  e1000_get_bus_info_pcie_generic - Get PCIe bus information
- *  @hw: pointer to the HW structure
- *
- *  Determines and stores the system bus information for a particular
- *  network interface.  The following bus information is determined and stored:
- *  bus speed, bus width, type (PCIe), and PCIe function.
- **/
-s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	struct e1000_bus_info *bus = &hw->bus;
-	s32 ret_val;
-	u16 pcie_link_status;
-
-	DEBUGFUNC("e1000_get_bus_info_pcie_generic");
-
-	bus->type = e1000_bus_type_pci_express;
-
-	ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS,
-					  &pcie_link_status);
-	if (ret_val) {
-		bus->width = e1000_bus_width_unknown;
-		bus->speed = e1000_bus_speed_unknown;
-	} else {
-		switch (pcie_link_status & PCIE_LINK_SPEED_MASK) {
-		case PCIE_LINK_SPEED_2500:
-			bus->speed = e1000_bus_speed_2500;
-			break;
-		case PCIE_LINK_SPEED_5000:
-			bus->speed = e1000_bus_speed_5000;
-			break;
-		default:
-			bus->speed = e1000_bus_speed_unknown;
-			break;
-		}
-
-		bus->width = (enum e1000_bus_width)((pcie_link_status &
-			      PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT);
-	}
-
-	mac->ops.set_lan_id(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
- *
- *  @hw: pointer to the HW structure
- *
- *  Determines the LAN function id by reading memory-mapped registers
- *  and swaps the port value if requested.
- **/
-static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
-{
-	struct e1000_bus_info *bus = &hw->bus;
-	u32 reg;
-
-	/* The status register reports the correct function number
-	 * for the device regardless of function swap state.
-	 */
-	reg = E1000_READ_REG(hw, E1000_STATUS);
-	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
-}
-
-/**
- *  e1000_set_lan_id_single_port - Set LAN id for a single port device
- *  @hw: pointer to the HW structure
- *
- *  Sets the LAN function id to zero for a single port device.
- **/
-void e1000_set_lan_id_single_port(struct e1000_hw *hw)
-{
-	struct e1000_bus_info *bus = &hw->bus;
-
-	bus->func = 0;
-}
-
-/**
- *  e1000_clear_vfta_generic - Clear VLAN filter table
- *  @hw: pointer to the HW structure
- *
- *  Clears the register array which contains the VLAN filter table by
- *  setting all the values to 0.
- **/
-void e1000_clear_vfta_generic(struct e1000_hw *hw)
-{
-	u32 offset;
-
-	DEBUGFUNC("e1000_clear_vfta_generic");
-
-	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
-		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
-		E1000_WRITE_FLUSH(hw);
-	}
-}
-
-/**
- *  e1000_write_vfta_generic - Write value to VLAN filter table
- *  @hw: pointer to the HW structure
- *  @offset: register offset in VLAN filter table
- *  @value: register value written to VLAN filter table
- *
- *  Writes value at the given offset in the register array which stores
- *  the VLAN filter table.
- **/
-void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
-{
-	DEBUGFUNC("e1000_write_vfta_generic");
-
-	E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- *  e1000_init_rx_addrs_generic - Initialize receive address's
- *  @hw: pointer to the HW structure
- *  @rar_count: receive address registers
- *
- *  Setup the receive address registers by setting the base receive address
- *  register to the devices MAC address and clearing all the other receive
- *  address registers to 0.
- **/
-void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count)
-{
-	u32 i;
-	u8 mac_addr[ETH_ADDR_LEN] = {0};
-
-	DEBUGFUNC("e1000_init_rx_addrs_generic");
-
-	/* Setup the receive address */
-	DEBUGOUT("Programming MAC Address into RAR[0]\n");
-
-	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
-
-	/* Zero out the other (rar_entry_count - 1) receive addresses */
-	DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1);
-	for (i = 1; i < rar_count; i++)
-		hw->mac.ops.rar_set(hw, mac_addr, i);
-}
-
-/**
- *  e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
- *  @hw: pointer to the HW structure
- *
- *  Checks the nvm for an alternate MAC address.  An alternate MAC address
- *  can be setup by pre-boot software and must be treated like a permanent
- *  address and must override the actual permanent MAC address. If an
- *  alternate MAC address is found it is programmed into RAR0, replacing
- *  the permanent address that was installed into RAR0 by the Si on reset.
- *  This function will return SUCCESS unless it encounters an error while
- *  reading the EEPROM.
- **/
-s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
-{
-	u32 i;
-	s32 ret_val;
-	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
-	u8 alt_mac_addr[ETH_ADDR_LEN];
-
-	DEBUGFUNC("e1000_check_alt_mac_addr_generic");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data);
-	if (ret_val)
-		return ret_val;
-
-
-	/* Alternate MAC address is handled by the option ROM for 82580
-	 * and newer. SW support not required.
-	 */
-	if (hw->mac.type >= e1000_82580)
-		return E1000_SUCCESS;
-
-	ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
-				   &nvm_alt_mac_addr_offset);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
-	    (nvm_alt_mac_addr_offset == 0x0000))
-		/* There is no Alternate MAC Address */
-		return E1000_SUCCESS;
-
-	if (hw->bus.func == E1000_FUNC_1)
-		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
-	if (hw->bus.func == E1000_FUNC_2)
-		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2;
-
-	if (hw->bus.func == E1000_FUNC_3)
-		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3;
-	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
-		offset = nvm_alt_mac_addr_offset + (i >> 1);
-		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error\n");
-			return ret_val;
-		}
-
-		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
-		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
-	}
-
-	/* if multicast bit is set, the alternate address will not be used */
-	if (alt_mac_addr[0] & 0x01) {
-		DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n");
-		return E1000_SUCCESS;
-	}
-
-	/* We have a valid alternate MAC address, and we want to treat it the
-	 * same as the normal permanent MAC address stored by the HW into the
-	 * RAR. Do this by mapping this address into RAR0.
-	 */
-	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_rar_set_generic - Set receive address register
- *  @hw: pointer to the HW structure
- *  @addr: pointer to the receive address
- *  @index: receive address array register
- *
- *  Sets the receive address array register at index to the address passed
- *  in by addr.
- **/
-static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
-{
-	u32 rar_low, rar_high;
-
-	DEBUGFUNC("e1000_rar_set_generic");
-
-	/* HW expects these in little endian so we reverse the byte order
-	 * from network order (big endian) to little endian
-	 */
-	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
-		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
-
-	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
-
-	/* If MAC address zero, no need to set the AV bit */
-	if (rar_low || rar_high)
-		rar_high |= E1000_RAH_AV;
-
-	/* Some bridges will combine consecutive 32-bit writes into
-	 * a single burst write, which will malfunction on some parts.
-	 * The flushes avoid this.
-	 */
-	E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
-	E1000_WRITE_FLUSH(hw);
-	E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- *  e1000_hash_mc_addr_generic - Generate a multicast hash value
- *  @hw: pointer to the HW structure
- *  @mc_addr: pointer to a multicast address
- *
- *  Generates a multicast address hash value which is used to determine
- *  the multicast filter table array address and new table value.
- **/
-u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr)
-{
-	u32 hash_value, hash_mask;
-	u8 bit_shift = 0;
-
-	DEBUGFUNC("e1000_hash_mc_addr_generic");
-
-	/* Register count multiplied by bits per register */
-	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
-
-	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
-	 * where 0xFF would still fall within the hash mask.
-	 */
-	while (hash_mask >> bit_shift != 0xFF)
-		bit_shift++;
-
-	/* The portion of the address that is used for the hash table
-	 * is determined by the mc_filter_type setting.
-	 * The algorithm is such that there is a total of 8 bits of shifting.
-	 * The bit_shift for a mc_filter_type of 0 represents the number of
-	 * left-shifts where the MSB of mc_addr[5] would still fall within
-	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
-	 * of 8 bits of shifting, then mc_addr[4] will shift right the
-	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
-	 * cases are a variation of this algorithm...essentially raising the
-	 * number of bits to shift mc_addr[5] left, while still keeping the
-	 * 8-bit shifting total.
-	 *
-	 * For example, given the following Destination MAC Address and an
-	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
-	 * we can see that the bit_shift for case 0 is 4.  These are the hash
-	 * values resulting from each mc_filter_type...
-	 * [0] [1] [2] [3] [4] [5]
-	 * 01  AA  00  12  34  56
-	 * LSB		 MSB
-	 *
-	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
-	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
-	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
-	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
-	 */
-	switch (hw->mac.mc_filter_type) {
-	default:
-	case 0:
-		break;
-	case 1:
-		bit_shift += 1;
-		break;
-	case 2:
-		bit_shift += 2;
-		break;
-	case 3:
-		bit_shift += 4;
-		break;
-	}
-
-	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
-				  (((u16) mc_addr[5]) << bit_shift)));
-
-	return hash_value;
-}
-
-/**
- *  e1000_update_mc_addr_list_generic - Update Multicast addresses
- *  @hw: pointer to the HW structure
- *  @mc_addr_list: array of multicast addresses to program
- *  @mc_addr_count: number of multicast addresses to program
- *
- *  Updates entire Multicast Table Array.
- *  The caller must have a packed mc_addr_list of multicast addresses.
- **/
-void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
-				       u8 *mc_addr_list, u32 mc_addr_count)
-{
-	u32 hash_value, hash_bit, hash_reg;
-	int i;
-
-	DEBUGFUNC("e1000_update_mc_addr_list_generic");
-
-	/* clear mta_shadow */
-	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
-
-	/* update mta_shadow from mc_addr_list */
-	for (i = 0; (u32) i < mc_addr_count; i++) {
-		hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list);
-
-		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
-		hash_bit = hash_value & 0x1F;
-
-		hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
-		mc_addr_list += (ETH_ADDR_LEN);
-	}
-
-	/* replace the entire MTA table */
-	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
-		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- *  e1000_clear_hw_cntrs_base_generic - Clear base hardware counters
- *  @hw: pointer to the HW structure
- *
- *  Clears the base hardware counters by reading the counter registers.
- **/
-void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_clear_hw_cntrs_base_generic");
-
-	E1000_READ_REG(hw, E1000_CRCERRS);
-	E1000_READ_REG(hw, E1000_SYMERRS);
-	E1000_READ_REG(hw, E1000_MPC);
-	E1000_READ_REG(hw, E1000_SCC);
-	E1000_READ_REG(hw, E1000_ECOL);
-	E1000_READ_REG(hw, E1000_MCC);
-	E1000_READ_REG(hw, E1000_LATECOL);
-	E1000_READ_REG(hw, E1000_COLC);
-	E1000_READ_REG(hw, E1000_DC);
-	E1000_READ_REG(hw, E1000_SEC);
-	E1000_READ_REG(hw, E1000_RLEC);
-	E1000_READ_REG(hw, E1000_XONRXC);
-	E1000_READ_REG(hw, E1000_XONTXC);
-	E1000_READ_REG(hw, E1000_XOFFRXC);
-	E1000_READ_REG(hw, E1000_XOFFTXC);
-	E1000_READ_REG(hw, E1000_FCRUC);
-	E1000_READ_REG(hw, E1000_GPRC);
-	E1000_READ_REG(hw, E1000_BPRC);
-	E1000_READ_REG(hw, E1000_MPRC);
-	E1000_READ_REG(hw, E1000_GPTC);
-	E1000_READ_REG(hw, E1000_GORCL);
-	E1000_READ_REG(hw, E1000_GORCH);
-	E1000_READ_REG(hw, E1000_GOTCL);
-	E1000_READ_REG(hw, E1000_GOTCH);
-	E1000_READ_REG(hw, E1000_RNBC);
-	E1000_READ_REG(hw, E1000_RUC);
-	E1000_READ_REG(hw, E1000_RFC);
-	E1000_READ_REG(hw, E1000_ROC);
-	E1000_READ_REG(hw, E1000_RJC);
-	E1000_READ_REG(hw, E1000_TORL);
-	E1000_READ_REG(hw, E1000_TORH);
-	E1000_READ_REG(hw, E1000_TOTL);
-	E1000_READ_REG(hw, E1000_TOTH);
-	E1000_READ_REG(hw, E1000_TPR);
-	E1000_READ_REG(hw, E1000_TPT);
-	E1000_READ_REG(hw, E1000_MPTC);
-	E1000_READ_REG(hw, E1000_BPTC);
-}
-
-/**
- *  e1000_check_for_copper_link_generic - Check for link (Copper)
- *  @hw: pointer to the HW structure
- *
- *  Checks to see of the link status of the hardware has changed.  If a
- *  change in link status has been detected, then we read the PHY registers
- *  to get the current speed/duplex if link exists.
- **/
-s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	s32 ret_val;
-	bool link;
-
-	DEBUGFUNC("e1000_check_for_copper_link");
-
-	/* We only want to go out to the PHY registers to see if Auto-Neg
-	 * has completed and/or if our link status has changed.  The
-	 * get_link_status flag is set upon receiving a Link Status
-	 * Change or Rx Sequence Error interrupt.
-	 */
-	if (!mac->get_link_status)
-		return E1000_SUCCESS;
-
-	/* First we want to see if the MII Status Register reports
-	 * link.  If so, then we want to get the current speed/duplex
-	 * of the PHY.
-	 */
-	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link)
-		return E1000_SUCCESS; /* No link detected */
-
-	mac->get_link_status = false;
-
-	/* Check if there was DownShift, must be checked
-	 * immediately after link-up
-	 */
-	e1000_check_downshift_generic(hw);
-
-	/* If we are forcing speed/duplex, then we simply return since
-	 * we have already determined whether we have link or not.
-	 */
-	if (!mac->autoneg)
-		return -E1000_ERR_CONFIG;
-
-	/* Auto-Neg is enabled.  Auto Speed Detection takes care
-	 * of MAC speed/duplex configuration.  So we only need to
-	 * configure Collision Distance in the MAC.
-	 */
-	mac->ops.config_collision_dist(hw);
-
-	/* Configure Flow Control now that Auto-Neg has completed.
-	 * First, we need to restore the desired flow control
-	 * settings because we may have had to re-autoneg with a
-	 * different link partner.
-	 */
-	ret_val = e1000_config_fc_after_link_up_generic(hw);
-	if (ret_val)
-		DEBUGOUT("Error configuring flow control\n");
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_fiber_link_generic - Check for link (Fiber)
- *  @hw: pointer to the HW structure
- *
- *  Checks for link up on the hardware.  If link is not up and we have
- *  a signal, then we need to force link up.
- **/
-s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 rxcw;
-	u32 ctrl;
-	u32 status;
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_check_for_fiber_link_generic");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	status = E1000_READ_REG(hw, E1000_STATUS);
-	rxcw = E1000_READ_REG(hw, E1000_RXCW);
-
-	/* If we don't have link (auto-negotiation failed or link partner
-	 * cannot auto-negotiate), the cable is plugged in (we have signal),
-	 * and our link partner is not trying to auto-negotiate with us (we
-	 * are receiving idles or data), we need to force link up. We also
-	 * need to give auto-negotiation time to complete, in case the cable
-	 * was just plugged in. The autoneg_failed flag does this.
-	 */
-	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
-	if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) &&
-	    !(rxcw & E1000_RXCW_C)) {
-		if (!mac->autoneg_failed) {
-			mac->autoneg_failed = true;
-			return E1000_SUCCESS;
-		}
-		DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
-
-		/* Disable auto-negotiation in the TXCW register */
-		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
-
-		/* Force link-up and also force full-duplex. */
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-		/* Configure Flow Control after forcing link up. */
-		ret_val = e1000_config_fc_after_link_up_generic(hw);
-		if (ret_val) {
-			DEBUGOUT("Error configuring flow control\n");
-			return ret_val;
-		}
-	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
-		/* If we are forcing link and we are receiving /C/ ordered
-		 * sets, re-enable auto-negotiation in the TXCW register
-		 * and disable forced link in the Device Control register
-		 * in an attempt to auto-negotiate with our link partner.
-		 */
-		DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
-		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
-		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
-
-		mac->serdes_has_link = true;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_check_for_serdes_link_generic - Check for link (Serdes)
- *  @hw: pointer to the HW structure
- *
- *  Checks for link up on the hardware.  If link is not up and we have
- *  a signal, then we need to force link up.
- **/
-s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 rxcw;
-	u32 ctrl;
-	u32 status;
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_check_for_serdes_link_generic");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	status = E1000_READ_REG(hw, E1000_STATUS);
-	rxcw = E1000_READ_REG(hw, E1000_RXCW);
-
-	/* If we don't have link (auto-negotiation failed or link partner
-	 * cannot auto-negotiate), and our link partner is not trying to
-	 * auto-negotiate with us (we are receiving idles or data),
-	 * we need to force link up. We also need to give auto-negotiation
-	 * time to complete.
-	 */
-	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
-	if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) {
-		if (!mac->autoneg_failed) {
-			mac->autoneg_failed = true;
-			return E1000_SUCCESS;
-		}
-		DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
-
-		/* Disable auto-negotiation in the TXCW register */
-		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
-
-		/* Force link-up and also force full-duplex. */
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-		/* Configure Flow Control after forcing link up. */
-		ret_val = e1000_config_fc_after_link_up_generic(hw);
-		if (ret_val) {
-			DEBUGOUT("Error configuring flow control\n");
-			return ret_val;
-		}
-	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
-		/* If we are forcing link and we are receiving /C/ ordered
-		 * sets, re-enable auto-negotiation in the TXCW register
-		 * and disable forced link in the Device Control register
-		 * in an attempt to auto-negotiate with our link partner.
-		 */
-		DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
-		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
-		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
-
-		mac->serdes_has_link = true;
-	} else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) {
-		/* If we force link for non-auto-negotiation switch, check
-		 * link status based on MAC synchronization for internal
-		 * serdes media type.
-		 */
-		/* SYNCH bit and IV bit are sticky. */
-		usec_delay(10);
-		rxcw = E1000_READ_REG(hw, E1000_RXCW);
-		if (rxcw & E1000_RXCW_SYNCH) {
-			if (!(rxcw & E1000_RXCW_IV)) {
-				mac->serdes_has_link = true;
-				DEBUGOUT("SERDES: Link up - forced.\n");
-			}
-		} else {
-			mac->serdes_has_link = false;
-			DEBUGOUT("SERDES: Link down - force failed.\n");
-		}
-	}
-
-	if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) {
-		status = E1000_READ_REG(hw, E1000_STATUS);
-		if (status & E1000_STATUS_LU) {
-			/* SYNCH bit and IV bit are sticky, so reread rxcw. */
-			usec_delay(10);
-			rxcw = E1000_READ_REG(hw, E1000_RXCW);
-			if (rxcw & E1000_RXCW_SYNCH) {
-				if (!(rxcw & E1000_RXCW_IV)) {
-					mac->serdes_has_link = true;
-					DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n");
-				} else {
-					mac->serdes_has_link = false;
-					DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n");
-				}
-			} else {
-				mac->serdes_has_link = false;
-				DEBUGOUT("SERDES: Link down - no sync.\n");
-			}
-		} else {
-			mac->serdes_has_link = false;
-			DEBUGOUT("SERDES: Link down - autoneg failed\n");
-		}
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_set_default_fc_generic - Set flow control default values
- *  @hw: pointer to the HW structure
- *
- *  Read the EEPROM for the default values for flow control and store the
- *  values.
- **/
-static s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 nvm_data;
-
-	DEBUGFUNC("e1000_set_default_fc_generic");
-
-	/* Read and store word 0x0F of the EEPROM. This word contains bits
-	 * that determine the hardware's default PAUSE (flow control) mode,
-	 * a bit that determines whether the HW defaults to enabling or
-	 * disabling auto-negotiation, and the direction of the
-	 * SW defined pins. If there is no SW over-ride of the flow
-	 * control setting, then the variable hw->fc will
-	 * be initialized based on a value in the EEPROM.
-	 */
-	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
-
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	if (!(nvm_data & NVM_WORD0F_PAUSE_MASK))
-		hw->fc.requested_mode = e1000_fc_none;
-	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
-		 NVM_WORD0F_ASM_DIR)
-		hw->fc.requested_mode = e1000_fc_tx_pause;
-	else
-		hw->fc.requested_mode = e1000_fc_full;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_setup_link_generic - Setup flow control and link settings
- *  @hw: pointer to the HW structure
- *
- *  Determines which flow control settings to use, then configures flow
- *  control.  Calls the appropriate media-specific link configuration
- *  function.  Assuming the adapter has a valid link partner, a valid link
- *  should be established.  Assumes the hardware has previously been reset
- *  and the transmitter and receiver are not enabled.
- **/
-s32 e1000_setup_link_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_setup_link_generic");
-
-	/* In the case of the phy reset being blocked, we already have a link.
-	 * We do not need to set it up again.
-	 */
-	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
-		return E1000_SUCCESS;
-
-	/* If requested flow control is set to default, set flow control
-	 * based on the EEPROM flow control settings.
-	 */
-	if (hw->fc.requested_mode == e1000_fc_default) {
-		ret_val = e1000_set_default_fc_generic(hw);
-		if (ret_val)
-			return ret_val;
-	}
-
-	/* Save off the requested flow control mode for use later.  Depending
-	 * on the link partner's capabilities, we may or may not use this mode.
-	 */
-	hw->fc.current_mode = hw->fc.requested_mode;
-
-	DEBUGOUT1("After fix-ups FlowControl is now = %x\n",
-		hw->fc.current_mode);
-
-	/* Call the necessary media_type subroutine to configure the link. */
-	ret_val = hw->mac.ops.setup_physical_interface(hw);
-	if (ret_val)
-		return ret_val;
-
-	/* Initialize the flow control address, type, and PAUSE timer
-	 * registers to their default values.  This is done even if flow
-	 * control is disabled, because it does not hurt anything to
-	 * initialize these registers.
-	 */
-	DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
-	E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE);
-	E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
-	E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
-
-	E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
-
-	return e1000_set_fc_watermarks_generic(hw);
-}
-
-/**
- *  e1000_commit_fc_settings_generic - Configure flow control
- *  @hw: pointer to the HW structure
- *
- *  Write the flow control settings to the Transmit Config Word Register (TXCW)
- *  base on the flow control settings in e1000_mac_info.
- **/
-static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 txcw;
-
-	DEBUGFUNC("e1000_commit_fc_settings_generic");
-
-	/* Check for a software override of the flow control settings, and
-	 * setup the device accordingly.  If auto-negotiation is enabled, then
-	 * software will have to set the "PAUSE" bits to the correct value in
-	 * the Transmit Config Word Register (TXCW) and re-start auto-
-	 * negotiation.  However, if auto-negotiation is disabled, then
-	 * software will have to manually configure the two flow control enable
-	 * bits in the CTRL register.
-	 *
-	 * The possible values of the "fc" parameter are:
-	 *      0:  Flow control is completely disabled
-	 *      1:  Rx flow control is enabled (we can receive pause frames,
-	 *          but not send pause frames).
-	 *      2:  Tx flow control is enabled (we can send pause frames but we
-	 *          do not support receiving pause frames).
-	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
-	 */
-	switch (hw->fc.current_mode) {
-	case e1000_fc_none:
-		/* Flow control completely disabled by a software over-ride. */
-		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
-		break;
-	case e1000_fc_rx_pause:
-		/* Rx Flow control is enabled and Tx Flow control is disabled
-		 * by a software over-ride. Since there really isn't a way to
-		 * advertise that we are capable of Rx Pause ONLY, we will
-		 * advertise that we support both symmetric and asymmetric Rx
-		 * PAUSE.  Later, we will disable the adapter's ability to send
-		 * PAUSE frames.
-		 */
-		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
-		break;
-	case e1000_fc_tx_pause:
-		/* Tx Flow control is enabled, and Rx Flow control is disabled,
-		 * by a software over-ride.
-		 */
-		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
-		break;
-	case e1000_fc_full:
-		/* Flow control (both Rx and Tx) is enabled by a software
-		 * over-ride.
-		 */
-		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
-		break;
-	default:
-		DEBUGOUT("Flow control param set incorrectly\n");
-		return -E1000_ERR_CONFIG;
-		break;
-	}
-
-	E1000_WRITE_REG(hw, E1000_TXCW, txcw);
-	mac->txcw = txcw;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_poll_fiber_serdes_link_generic - Poll for link up
- *  @hw: pointer to the HW structure
- *
- *  Polls for link up by reading the status register, if link fails to come
- *  up with auto-negotiation, then the link is forced if a signal is detected.
- **/
-static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 i, status;
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_poll_fiber_serdes_link_generic");
-
-	/* If we have a signal (the cable is plugged in, or assumed true for
-	 * serdes media) then poll for a "Link-Up" indication in the Device
-	 * Status Register.  Time-out if a link isn't seen in 500 milliseconds
-	 * seconds (Auto-negotiation should complete in less than 500
-	 * milliseconds even if the other end is doing it in SW).
-	 */
-	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
-		msec_delay(10);
-		status = E1000_READ_REG(hw, E1000_STATUS);
-		if (status & E1000_STATUS_LU)
-			break;
-	}
-	if (i == FIBER_LINK_UP_LIMIT) {
-		DEBUGOUT("Never got a valid link from auto-neg!!!\n");
-		mac->autoneg_failed = true;
-		/* AutoNeg failed to achieve a link, so we'll call
-		 * mac->check_for_link. This routine will force the
-		 * link up if we detect a signal. This will allow us to
-		 * communicate with non-autonegotiating link partners.
-		 */
-		ret_val = mac->ops.check_for_link(hw);
-		if (ret_val) {
-			DEBUGOUT("Error while checking for link\n");
-			return ret_val;
-		}
-		mac->autoneg_failed = false;
-	} else {
-		mac->autoneg_failed = false;
-		DEBUGOUT("Valid Link Found\n");
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes
- *  @hw: pointer to the HW structure
- *
- *  Configures collision distance and flow control for fiber and serdes
- *  links.  Upon successful setup, poll for link.
- **/
-s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw)
-{
-	u32 ctrl;
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_setup_fiber_serdes_link_generic");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-
-	/* Take the link out of reset */
-	ctrl &= ~E1000_CTRL_LRST;
-
-	hw->mac.ops.config_collision_dist(hw);
-
-	ret_val = e1000_commit_fc_settings_generic(hw);
-	if (ret_val)
-		return ret_val;
-
-	/* Since auto-negotiation is enabled, take the link out of reset (the
-	 * link will be in reset, because we previously reset the chip). This
-	 * will restart auto-negotiation.  If auto-negotiation is successful
-	 * then the link-up status bit will be set and the flow control enable
-	 * bits (RFCE and TFCE) will be set according to their negotiated value.
-	 */
-	DEBUGOUT("Auto-negotiation enabled\n");
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-	E1000_WRITE_FLUSH(hw);
-	msec_delay(1);
-
-	/* For these adapters, the SW definable pin 1 is set when the optics
-	 * detect a signal.  If we have a signal, then poll for a "Link-Up"
-	 * indication.
-	 */
-	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
-	    (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
-		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
-	} else {
-		DEBUGOUT("No signal detected\n");
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_config_collision_dist_generic - Configure collision distance
- *  @hw: pointer to the HW structure
- *
- *  Configures the collision distance to the default value and is used
- *  during link setup.
- **/
-static void e1000_config_collision_dist_generic(struct e1000_hw *hw)
-{
-	u32 tctl;
-
-	DEBUGFUNC("e1000_config_collision_dist_generic");
-
-	tctl = E1000_READ_REG(hw, E1000_TCTL);
-
-	tctl &= ~E1000_TCTL_COLD;
-	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
-
-	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- *  e1000_set_fc_watermarks_generic - Set flow control high/low watermarks
- *  @hw: pointer to the HW structure
- *
- *  Sets the flow control high/low threshold (watermark) registers.  If
- *  flow control XON frame transmission is enabled, then set XON frame
- *  transmission as well.
- **/
-s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw)
-{
-	u32 fcrtl = 0, fcrth = 0;
-
-	DEBUGFUNC("e1000_set_fc_watermarks_generic");
-
-	/* Set the flow control receive threshold registers.  Normally,
-	 * these registers will be set to a default threshold that may be
-	 * adjusted later by the driver's runtime code.  However, if the
-	 * ability to transmit pause frames is not enabled, then these
-	 * registers will be set to 0.
-	 */
-	if (hw->fc.current_mode & e1000_fc_tx_pause) {
-		/* We need to set up the Receive Threshold high and low water
-		 * marks as well as (optionally) enabling the transmission of
-		 * XON frames.
-		 */
-		fcrtl = hw->fc.low_water;
-		if (hw->fc.send_xon)
-			fcrtl |= E1000_FCRTL_XONE;
-
-		fcrth = hw->fc.high_water;
-	}
-	E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl);
-	E1000_WRITE_REG(hw, E1000_FCRTH, fcrth);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_force_mac_fc_generic - Force the MAC's flow control settings
- *  @hw: pointer to the HW structure
- *
- *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
- *  device control register to reflect the adapter settings.  TFCE and RFCE
- *  need to be explicitly set by software when a copper PHY is used because
- *  autonegotiation is managed by the PHY rather than the MAC.  Software must
- *  also configure these bits when link is forced on a fiber connection.
- **/
-s32 e1000_force_mac_fc_generic(struct e1000_hw *hw)
-{
-	u32 ctrl;
-
-	DEBUGFUNC("e1000_force_mac_fc_generic");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-
-	/* Because we didn't get link via the internal auto-negotiation
-	 * mechanism (we either forced link or we got link via PHY
-	 * auto-neg), we have to manually enable/disable transmit an
-	 * receive flow control.
-	 *
-	 * The "Case" statement below enables/disable flow control
-	 * according to the "hw->fc.current_mode" parameter.
-	 *
-	 * The possible values of the "fc" parameter are:
-	 *      0:  Flow control is completely disabled
-	 *      1:  Rx flow control is enabled (we can receive pause
-	 *          frames but not send pause frames).
-	 *      2:  Tx flow control is enabled (we can send pause frames
-	 *          frames but we do not receive pause frames).
-	 *      3:  Both Rx and Tx flow control (symmetric) is enabled.
-	 *  other:  No other values should be possible at this point.
-	 */
-	DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode);
-
-	switch (hw->fc.current_mode) {
-	case e1000_fc_none:
-		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
-		break;
-	case e1000_fc_rx_pause:
-		ctrl &= (~E1000_CTRL_TFCE);
-		ctrl |= E1000_CTRL_RFCE;
-		break;
-	case e1000_fc_tx_pause:
-		ctrl &= (~E1000_CTRL_RFCE);
-		ctrl |= E1000_CTRL_TFCE;
-		break;
-	case e1000_fc_full:
-		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
-		break;
-	default:
-		DEBUGOUT("Flow control param set incorrectly\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_config_fc_after_link_up_generic - Configures flow control after link
- *  @hw: pointer to the HW structure
- *
- *  Checks the status of auto-negotiation after link up to ensure that the
- *  speed and duplex were not forced.  If the link needed to be forced, then
- *  flow control needs to be forced also.  If auto-negotiation is enabled
- *  and did not fail, then we configure flow control based on our link
- *  partner.
- **/
-s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	s32 ret_val = E1000_SUCCESS;
-	u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
-	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
-	u16 speed, duplex;
-
-	DEBUGFUNC("e1000_config_fc_after_link_up_generic");
-
-	/* Check for the case where we have fiber media and auto-neg failed
-	 * so we had to force link.  In this case, we need to force the
-	 * configuration of the MAC to match the "fc" parameter.
-	 */
-	if (mac->autoneg_failed) {
-		if (hw->phy.media_type == e1000_media_type_fiber ||
-		    hw->phy.media_type == e1000_media_type_internal_serdes)
-			ret_val = e1000_force_mac_fc_generic(hw);
-	} else {
-		if (hw->phy.media_type == e1000_media_type_copper)
-			ret_val = e1000_force_mac_fc_generic(hw);
-	}
-
-	if (ret_val) {
-		DEBUGOUT("Error forcing flow control settings\n");
-		return ret_val;
-	}
-
-	/* Check for the case where we have copper media and auto-neg is
-	 * enabled.  In this case, we need to check and see if Auto-Neg
-	 * has completed, and if so, how the PHY and link partner has
-	 * flow control configured.
-	 */
-	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
-		/* Read the MII Status Register and check to see if AutoNeg
-		 * has completed.  We read this twice because this reg has
-		 * some "sticky" (latched) bits.
-		 */
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
-		if (ret_val)
-			return ret_val;
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
-		if (ret_val)
-			return ret_val;
-
-		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
-			DEBUGOUT("Copper PHY and Auto Neg has not completed.\n");
-			return ret_val;
-		}
-
-		/* The AutoNeg process has completed, so we now need to
-		 * read both the Auto Negotiation Advertisement
-		 * Register (Address 4) and the Auto_Negotiation Base
-		 * Page Ability Register (Address 5) to determine how
-		 * flow control was negotiated.
-		 */
-		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
-					       &mii_nway_adv_reg);
-		if (ret_val)
-			return ret_val;
-		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
-					       &mii_nway_lp_ability_reg);
-		if (ret_val)
-			return ret_val;
-
-		/* Two bits in the Auto Negotiation Advertisement Register
-		 * (Address 4) and two bits in the Auto Negotiation Base
-		 * Page Ability Register (Address 5) determine flow control
-		 * for both the PHY and the link partner.  The following
-		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
-		 * 1999, describes these PAUSE resolution bits and how flow
-		 * control is determined based upon these settings.
-		 * NOTE:  DC = Don't Care
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
-		 *-------|---------|-------|---------|--------------------
-		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
-		 *   0   |    1    |   0   |   DC    | e1000_fc_none
-		 *   0   |    1    |   1   |    0    | e1000_fc_none
-		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
-		 *   1   |    0    |   0   |   DC    | e1000_fc_none
-		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
-		 *   1   |    1    |   0   |    0    | e1000_fc_none
-		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
-		 *
-		 * Are both PAUSE bits set to 1?  If so, this implies
-		 * Symmetric Flow Control is enabled at both ends.  The
-		 * ASM_DIR bits are irrelevant per the spec.
-		 *
-		 * For Symmetric Flow Control:
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
-		 *
-		 */
-		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
-		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
-			/* Now we need to check if the user selected Rx ONLY
-			 * of pause frames.  In this case, we had to advertise
-			 * FULL flow control because we could not advertise Rx
-			 * ONLY. Hence, we must now check to see if we need to
-			 * turn OFF the TRANSMISSION of PAUSE frames.
-			 */
-			if (hw->fc.requested_mode == e1000_fc_full) {
-				hw->fc.current_mode = e1000_fc_full;
-				DEBUGOUT("Flow Control = FULL.\n");
-			} else {
-				hw->fc.current_mode = e1000_fc_rx_pause;
-				DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
-			}
-		}
-		/* For receiving PAUSE frames ONLY.
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
-		 */
-		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
-			  (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
-			  (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
-			  (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
-			hw->fc.current_mode = e1000_fc_tx_pause;
-			DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
-		}
-		/* For transmitting PAUSE frames ONLY.
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
-		 */
-		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
-			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
-			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
-			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
-			hw->fc.current_mode = e1000_fc_rx_pause;
-			DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
-		} else {
-			/* Per the IEEE spec, at this point flow control
-			 * should be disabled.
-			 */
-			hw->fc.current_mode = e1000_fc_none;
-			DEBUGOUT("Flow Control = NONE.\n");
-		}
-
-		/* Now we need to do one last check...  If we auto-
-		 * negotiated to HALF DUPLEX, flow control should not be
-		 * enabled per IEEE 802.3 spec.
-		 */
-		ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
-		if (ret_val) {
-			DEBUGOUT("Error getting link speed and duplex\n");
-			return ret_val;
-		}
-
-		if (duplex == HALF_DUPLEX)
-			hw->fc.current_mode = e1000_fc_none;
-
-		/* Now we call a subroutine to actually force the MAC
-		 * controller to use the correct flow control settings.
-		 */
-		ret_val = e1000_force_mac_fc_generic(hw);
-		if (ret_val) {
-			DEBUGOUT("Error forcing flow control settings\n");
-			return ret_val;
-		}
-	}
-
-	/* Check for the case where we have SerDes media and auto-neg is
-	 * enabled.  In this case, we need to check and see if Auto-Neg
-	 * has completed, and if so, how the PHY and link partner has
-	 * flow control configured.
-	 */
-	if ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
-	    mac->autoneg) {
-		/* Read the PCS_LSTS and check to see if AutoNeg
-		 * has completed.
-		 */
-		pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT);
-
-		if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
-			DEBUGOUT("PCS Auto Neg has not completed.\n");
-			return ret_val;
-		}
-
-		/* The AutoNeg process has completed, so we now need to
-		 * read both the Auto Negotiation Advertisement
-		 * Register (PCS_ANADV) and the Auto_Negotiation Base
-		 * Page Ability Register (PCS_LPAB) to determine how
-		 * flow control was negotiated.
-		 */
-		pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
-		pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB);
-
-		/* Two bits in the Auto Negotiation Advertisement Register
-		 * (PCS_ANADV) and two bits in the Auto Negotiation Base
-		 * Page Ability Register (PCS_LPAB) determine flow control
-		 * for both the PHY and the link partner.  The following
-		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
-		 * 1999, describes these PAUSE resolution bits and how flow
-		 * control is determined based upon these settings.
-		 * NOTE:  DC = Don't Care
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
-		 *-------|---------|-------|---------|--------------------
-		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
-		 *   0   |    1    |   0   |   DC    | e1000_fc_none
-		 *   0   |    1    |   1   |    0    | e1000_fc_none
-		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
-		 *   1   |    0    |   0   |   DC    | e1000_fc_none
-		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
-		 *   1   |    1    |   0   |    0    | e1000_fc_none
-		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
-		 *
-		 * Are both PAUSE bits set to 1?  If so, this implies
-		 * Symmetric Flow Control is enabled at both ends.  The
-		 * ASM_DIR bits are irrelevant per the spec.
-		 *
-		 * For Symmetric Flow Control:
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
-		 *
-		 */
-		if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
-		    (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
-			/* Now we need to check if the user selected Rx ONLY
-			 * of pause frames.  In this case, we had to advertise
-			 * FULL flow control because we could not advertise Rx
-			 * ONLY. Hence, we must now check to see if we need to
-			 * turn OFF the TRANSMISSION of PAUSE frames.
-			 */
-			if (hw->fc.requested_mode == e1000_fc_full) {
-				hw->fc.current_mode = e1000_fc_full;
-				DEBUGOUT("Flow Control = FULL.\n");
-			} else {
-				hw->fc.current_mode = e1000_fc_rx_pause;
-				DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
-			}
-		}
-		/* For receiving PAUSE frames ONLY.
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
-		 */
-		else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
-			  (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
-			  (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
-			  (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
-			hw->fc.current_mode = e1000_fc_tx_pause;
-			DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
-		}
-		/* For transmitting PAUSE frames ONLY.
-		 *
-		 *   LOCAL DEVICE  |   LINK PARTNER
-		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
-		 *-------|---------|-------|---------|--------------------
-		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
-		 */
-		else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
-			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
-			 !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
-			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
-			hw->fc.current_mode = e1000_fc_rx_pause;
-			DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
-		} else {
-			/* Per the IEEE spec, at this point flow control
-			 * should be disabled.
-			 */
-			hw->fc.current_mode = e1000_fc_none;
-			DEBUGOUT("Flow Control = NONE.\n");
-		}
-
-		/* Now we call a subroutine to actually force the MAC
-		 * controller to use the correct flow control settings.
-		 */
-		pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
-		pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
-		E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg);
-
-		ret_val = e1000_force_mac_fc_generic(hw);
-		if (ret_val) {
-			DEBUGOUT("Error forcing flow control settings\n");
-			return ret_val;
-		}
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex
- *  @hw: pointer to the HW structure
- *  @speed: stores the current speed
- *  @duplex: stores the current duplex
- *
- *  Read the status register for the current speed/duplex and store the current
- *  speed and duplex for copper connections.
- **/
-s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
-					      u16 *duplex)
-{
-	u32 status;
-
-	DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic");
-
-	status = E1000_READ_REG(hw, E1000_STATUS);
-	if (status & E1000_STATUS_SPEED_1000) {
-		*speed = SPEED_1000;
-		DEBUGOUT("1000 Mbs, ");
-	} else if (status & E1000_STATUS_SPEED_100) {
-		*speed = SPEED_100;
-		DEBUGOUT("100 Mbs, ");
-	} else {
-		*speed = SPEED_10;
-		DEBUGOUT("10 Mbs, ");
-	}
-
-	if (status & E1000_STATUS_FD) {
-		*duplex = FULL_DUPLEX;
-		DEBUGOUT("Full Duplex\n");
-	} else {
-		*duplex = HALF_DUPLEX;
-		DEBUGOUT("Half Duplex\n");
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex
- *  @hw: pointer to the HW structure
- *  @speed: stores the current speed
- *  @duplex: stores the current duplex
- *
- *  Sets the speed and duplex to gigabit full duplex (the only possible option)
- *  for fiber/serdes links.
- **/
-s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw,
-						    u16 *speed, u16 *duplex)
-{
-	DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic");
-
-	*speed = SPEED_1000;
-	*duplex = FULL_DUPLEX;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_hw_semaphore_generic - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_hw_semaphore_generic");
-
-	/* Get the SW semaphore */
-	while (i < timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-		return -E1000_ERR_NVM;
-	}
-
-	/* Get the FW semaphore. */
-	for (i = 0; i < timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore_generic(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_put_hw_semaphore_generic - Release hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Release hardware semaphore used to access the PHY or NVM
- **/
-void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
-{
-	u32 swsm;
-
-	DEBUGFUNC("e1000_put_hw_semaphore_generic");
-
-	swsm = E1000_READ_REG(hw, E1000_SWSM);
-
-	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
-
-	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
-}
-
-/**
- *  e1000_get_auto_rd_done_generic - Check for auto read completion
- *  @hw: pointer to the HW structure
- *
- *  Check EEPROM for Auto Read done bit.
- **/
-s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw)
-{
-	s32 i = 0;
-
-	DEBUGFUNC("e1000_get_auto_rd_done_generic");
-
-	while (i < AUTO_READ_DONE_TIMEOUT) {
-		if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD)
-			break;
-		msec_delay(1);
-		i++;
-	}
-
-	if (i == AUTO_READ_DONE_TIMEOUT) {
-		DEBUGOUT("Auto read by HW from NVM has not completed.\n");
-		return -E1000_ERR_RESET;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_valid_led_default_generic - Verify a valid default LED config
- *  @hw: pointer to the HW structure
- *  @data: pointer to the NVM (EEPROM)
- *
- *  Read the EEPROM for the current default LED configuration.  If the
- *  LED configuration is not valid, set to a valid LED configuration.
- **/
-s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_valid_led_default_generic");
-
-	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
-		*data = ID_LED_DEFAULT;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_id_led_init_generic -
- *  @hw: pointer to the HW structure
- *
- **/
-s32 e1000_id_led_init_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	s32 ret_val;
-	const u32 ledctl_mask = 0x000000FF;
-	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
-	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
-	u16 data, i, temp;
-	const u16 led_mask = 0x0F;
-
-	DEBUGFUNC("e1000_id_led_init_generic");
-
-	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
-	if (ret_val)
-		return ret_val;
-
-	mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL);
-	mac->ledctl_mode1 = mac->ledctl_default;
-	mac->ledctl_mode2 = mac->ledctl_default;
-
-	for (i = 0; i < 4; i++) {
-		temp = (data >> (i << 2)) & led_mask;
-		switch (temp) {
-		case ID_LED_ON1_DEF2:
-		case ID_LED_ON1_ON2:
-		case ID_LED_ON1_OFF2:
-			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
-			mac->ledctl_mode1 |= ledctl_on << (i << 3);
-			break;
-		case ID_LED_OFF1_DEF2:
-		case ID_LED_OFF1_ON2:
-		case ID_LED_OFF1_OFF2:
-			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
-			mac->ledctl_mode1 |= ledctl_off << (i << 3);
-			break;
-		default:
-			/* Do nothing */
-			break;
-		}
-		switch (temp) {
-		case ID_LED_DEF1_ON2:
-		case ID_LED_ON1_ON2:
-		case ID_LED_OFF1_ON2:
-			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
-			mac->ledctl_mode2 |= ledctl_on << (i << 3);
-			break;
-		case ID_LED_DEF1_OFF2:
-		case ID_LED_ON1_OFF2:
-		case ID_LED_OFF1_OFF2:
-			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
-			mac->ledctl_mode2 |= ledctl_off << (i << 3);
-			break;
-		default:
-			/* Do nothing */
-			break;
-		}
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_setup_led_generic - Configures SW controllable LED
- *  @hw: pointer to the HW structure
- *
- *  This prepares the SW controllable LED for use and saves the current state
- *  of the LED so it can be later restored.
- **/
-s32 e1000_setup_led_generic(struct e1000_hw *hw)
-{
-	u32 ledctl;
-
-	DEBUGFUNC("e1000_setup_led_generic");
-
-	if (hw->mac.ops.setup_led != e1000_setup_led_generic)
-		return -E1000_ERR_CONFIG;
-
-	if (hw->phy.media_type == e1000_media_type_fiber) {
-		ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
-		hw->mac.ledctl_default = ledctl;
-		/* Turn off LED0 */
-		ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK |
-			    E1000_LEDCTL_LED0_MODE_MASK);
-		ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
-			   E1000_LEDCTL_LED0_MODE_SHIFT);
-		E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
-	} else if (hw->phy.media_type == e1000_media_type_copper) {
-		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_cleanup_led_generic - Set LED config to default operation
- *  @hw: pointer to the HW structure
- *
- *  Remove the current LED configuration and set the LED configuration
- *  to the default value, saved from the EEPROM.
- **/
-s32 e1000_cleanup_led_generic(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_cleanup_led_generic");
-
-	E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_blink_led_generic - Blink LED
- *  @hw: pointer to the HW structure
- *
- *  Blink the LEDs which are set to be on.
- **/
-s32 e1000_blink_led_generic(struct e1000_hw *hw)
-{
-	u32 ledctl_blink = 0;
-	u32 i;
-
-	DEBUGFUNC("e1000_blink_led_generic");
-
-	if (hw->phy.media_type == e1000_media_type_fiber) {
-		/* always blink LED0 for PCI-E fiber */
-		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
-		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
-	} else {
-		/* Set the blink bit for each LED that's "on" (0x0E)
-		 * (or "off" if inverted) in ledctl_mode2.  The blink
-		 * logic in hardware only works when mode is set to "on"
-		 * so it must be changed accordingly when the mode is
-		 * "off" and inverted.
-		 */
-		ledctl_blink = hw->mac.ledctl_mode2;
-		for (i = 0; i < 32; i += 8) {
-			u32 mode = (hw->mac.ledctl_mode2 >> i) &
-			    E1000_LEDCTL_LED0_MODE_MASK;
-			u32 led_default = hw->mac.ledctl_default >> i;
-
-			if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
-			     (mode == E1000_LEDCTL_MODE_LED_ON)) ||
-			    ((led_default & E1000_LEDCTL_LED0_IVRT) &&
-			     (mode == E1000_LEDCTL_MODE_LED_OFF))) {
-				ledctl_blink &=
-				    ~(E1000_LEDCTL_LED0_MODE_MASK << i);
-				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
-						 E1000_LEDCTL_MODE_LED_ON) << i;
-			}
-		}
-	}
-
-	E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_led_on_generic - Turn LED on
- *  @hw: pointer to the HW structure
- *
- *  Turn LED on.
- **/
-s32 e1000_led_on_generic(struct e1000_hw *hw)
-{
-	u32 ctrl;
-
-	DEBUGFUNC("e1000_led_on_generic");
-
-	switch (hw->phy.media_type) {
-	case e1000_media_type_fiber:
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl &= ~E1000_CTRL_SWDPIN0;
-		ctrl |= E1000_CTRL_SWDPIO0;
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-		break;
-	case e1000_media_type_copper:
-		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2);
-		break;
-	default:
-		break;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_led_off_generic - Turn LED off
- *  @hw: pointer to the HW structure
- *
- *  Turn LED off.
- **/
-s32 e1000_led_off_generic(struct e1000_hw *hw)
-{
-	u32 ctrl;
-
-	DEBUGFUNC("e1000_led_off_generic");
-
-	switch (hw->phy.media_type) {
-	case e1000_media_type_fiber:
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl |= E1000_CTRL_SWDPIN0;
-		ctrl |= E1000_CTRL_SWDPIO0;
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-		break;
-	case e1000_media_type_copper:
-		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
-		break;
-	default:
-		break;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities
- *  @hw: pointer to the HW structure
- *  @no_snoop: bitmap of snoop events
- *
- *  Set the PCI-express register to snoop for events enabled in 'no_snoop'.
- **/
-void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop)
-{
-	u32 gcr;
-
-	DEBUGFUNC("e1000_set_pcie_no_snoop_generic");
-
-	if (no_snoop) {
-		gcr = E1000_READ_REG(hw, E1000_GCR);
-		gcr &= ~(PCIE_NO_SNOOP_ALL);
-		gcr |= no_snoop;
-		E1000_WRITE_REG(hw, E1000_GCR, gcr);
-	}
-}
-
-/**
- *  e1000_disable_pcie_master_generic - Disables PCI-express master access
- *  @hw: pointer to the HW structure
- *
- *  Returns E1000_SUCCESS if successful, else returns -10
- *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
- *  the master requests to be disabled.
- *
- *  Disables PCI-Express master access and verifies there are no pending
- *  requests.
- **/
-s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw)
-{
-	u32 ctrl;
-	s32 timeout = MASTER_DISABLE_TIMEOUT;
-
-	DEBUGFUNC("e1000_disable_pcie_master_generic");
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-	while (timeout) {
-		if (!(E1000_READ_REG(hw, E1000_STATUS) &
-		      E1000_STATUS_GIO_MASTER_ENABLE))
-			break;
-		usec_delay(100);
-		timeout--;
-	}
-
-	if (!timeout) {
-		DEBUGOUT("Master requests are pending.\n");
-		return -E1000_ERR_MASTER_REQUESTS_PENDING;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing
- *  @hw: pointer to the HW structure
- *
- *  Reset the Adaptive Interframe Spacing throttle to default values.
- **/
-void e1000_reset_adaptive_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-
-	DEBUGFUNC("e1000_reset_adaptive_generic");
-
-	if (!mac->adaptive_ifs) {
-		DEBUGOUT("Not in Adaptive IFS mode!\n");
-		return;
-	}
-
-	mac->current_ifs_val = 0;
-	mac->ifs_min_val = IFS_MIN;
-	mac->ifs_max_val = IFS_MAX;
-	mac->ifs_step_size = IFS_STEP;
-	mac->ifs_ratio = IFS_RATIO;
-
-	mac->in_ifs_mode = false;
-	E1000_WRITE_REG(hw, E1000_AIT, 0);
-}
-
-/**
- *  e1000_update_adaptive_generic - Update Adaptive Interframe Spacing
- *  @hw: pointer to the HW structure
- *
- *  Update the Adaptive Interframe Spacing Throttle value based on the
- *  time between transmitted packets and time between collisions.
- **/
-void e1000_update_adaptive_generic(struct e1000_hw *hw)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-
-	DEBUGFUNC("e1000_update_adaptive_generic");
-
-	if (!mac->adaptive_ifs) {
-		DEBUGOUT("Not in Adaptive IFS mode!\n");
-		return;
-	}
-
-	if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
-		if (mac->tx_packet_delta > MIN_NUM_XMITS) {
-			mac->in_ifs_mode = true;
-			if (mac->current_ifs_val < mac->ifs_max_val) {
-				if (!mac->current_ifs_val)
-					mac->current_ifs_val = mac->ifs_min_val;
-				else
-					mac->current_ifs_val +=
-						mac->ifs_step_size;
-				E1000_WRITE_REG(hw, E1000_AIT,
-						mac->current_ifs_val);
-			}
-		}
-	} else {
-		if (mac->in_ifs_mode &&
-		    (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
-			mac->current_ifs_val = 0;
-			mac->in_ifs_mode = false;
-			E1000_WRITE_REG(hw, E1000_AIT, 0);
-		}
-	}
-}
-
-/**
- *  e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings
- *  @hw: pointer to the HW structure
- *
- *  Verify that when not using auto-negotiation that MDI/MDIx is correctly
- *  set, which is forced to MDI mode only.
- **/
-static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw)
-{
-	DEBUGFUNC("e1000_validate_mdi_setting_generic");
-
-	if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
-		DEBUGOUT("Invalid MDI setting detected\n");
-		hw->phy.mdix = 1;
-		return -E1000_ERR_CONFIG;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings
- *  @hw: pointer to the HW structure
- *
- *  Validate the MDI/MDIx setting, allowing for auto-crossover during forced
- *  operation.
- **/
-s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic");
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register
- *  @hw: pointer to the HW structure
- *  @reg: 32bit register offset such as E1000_SCTL
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Writes an address/data control type register.  There are several of these
- *  and they all have the format address << 8 | data and bit 31 is polled for
- *  completion.
- **/
-s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
-				      u32 offset, u8 data)
-{
-	u32 i, regvalue = 0;
-
-	DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic");
-
-	/* Set up the address and data */
-	regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
-	E1000_WRITE_REG(hw, reg, regvalue);
-
-	/* Poll the ready bit to see if the MDI read completed */
-	for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
-		usec_delay(5);
-		regvalue = E1000_READ_REG(hw, reg);
-		if (regvalue & E1000_GEN_CTL_READY)
-			break;
-	}
-	if (!(regvalue & E1000_GEN_CTL_READY)) {
-		DEBUGOUT1("Reg %08x did not indicate ready\n", reg);
-		return -E1000_ERR_PHY;
-	}
-
-	return E1000_SUCCESS;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
deleted file mode 100644
index a3e78498..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_MAC_H_
-#define _E1000_MAC_H_
-
-void e1000_init_mac_ops_generic(struct e1000_hw *hw);
-void e1000_null_mac_generic(struct e1000_hw *hw);
-s32  e1000_null_ops_generic(struct e1000_hw *hw);
-s32  e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d);
-bool e1000_null_mng_mode(struct e1000_hw *hw);
-void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a);
-void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b);
-void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a);
-s32  e1000_blink_led_generic(struct e1000_hw *hw);
-s32  e1000_check_for_copper_link_generic(struct e1000_hw *hw);
-s32  e1000_check_for_fiber_link_generic(struct e1000_hw *hw);
-s32  e1000_check_for_serdes_link_generic(struct e1000_hw *hw);
-s32  e1000_cleanup_led_generic(struct e1000_hw *hw);
-s32  e1000_config_fc_after_link_up_generic(struct e1000_hw *hw);
-s32  e1000_disable_pcie_master_generic(struct e1000_hw *hw);
-s32  e1000_force_mac_fc_generic(struct e1000_hw *hw);
-s32  e1000_get_auto_rd_done_generic(struct e1000_hw *hw);
-s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
-void e1000_set_lan_id_single_port(struct e1000_hw *hw);
-s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
-s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
-					       u16 *duplex);
-s32  e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
-						     u16 *speed, u16 *duplex);
-s32  e1000_id_led_init_generic(struct e1000_hw *hw);
-s32  e1000_led_on_generic(struct e1000_hw *hw);
-s32  e1000_led_off_generic(struct e1000_hw *hw);
-void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
-				       u8 *mc_addr_list, u32 mc_addr_count);
-s32  e1000_set_fc_watermarks_generic(struct e1000_hw *hw);
-s32  e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw);
-s32  e1000_setup_led_generic(struct e1000_hw *hw);
-s32  e1000_setup_link_generic(struct e1000_hw *hw);
-s32  e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw);
-s32  e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
-				       u32 offset, u8 data);
-
-u32  e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr);
-
-void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw);
-void e1000_clear_vfta_generic(struct e1000_hw *hw);
-void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
-void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
-s32  e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
-void e1000_reset_adaptive_generic(struct e1000_hw *hw);
-void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
-void e1000_update_adaptive_generic(struct e1000_hw *hw);
-void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
deleted file mode 100644
index 2f75bc35..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
+++ /dev/null
@@ -1,539 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-/**
- *  e1000_calculate_checksum - Calculate checksum for buffer
- *  @buffer: pointer to EEPROM
- *  @length: size of EEPROM to calculate a checksum for
- *
- *  Calculates the checksum for some buffer on a specified length.  The
- *  checksum calculated is returned.
- **/
-u8 e1000_calculate_checksum(u8 *buffer, u32 length)
-{
-	u32 i;
-	u8 sum = 0;
-
-	DEBUGFUNC("e1000_calculate_checksum");
-
-	if (!buffer)
-		return 0;
-
-	for (i = 0; i < length; i++)
-		sum += buffer[i];
-
-	return (u8) (0 - sum);
-}
-
-/**
- *  e1000_mng_enable_host_if_generic - Checks host interface is enabled
- *  @hw: pointer to the HW structure
- *
- *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
- *
- *  This function checks whether the HOST IF is enabled for command operation
- *  and also checks whether the previous command is completed.  It busy waits
- *  in case of previous command is not completed.
- **/
-s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw)
-{
-	u32 hicr;
-	u8 i;
-
-	DEBUGFUNC("e1000_mng_enable_host_if_generic");
-
-	if (!hw->mac.arc_subsystem_valid) {
-		DEBUGOUT("ARC subsystem not valid.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	/* Check that the host interface is enabled. */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	if (!(hicr & E1000_HICR_EN)) {
-		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-	/* check the previous command is completed */
-	for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
-		hicr = E1000_READ_REG(hw, E1000_HICR);
-		if (!(hicr & E1000_HICR_C))
-			break;
-		msec_delay_irq(1);
-	}
-
-	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
-		DEBUGOUT("Previous command timeout failed .\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_check_mng_mode_generic - Generic check management mode
- *  @hw: pointer to the HW structure
- *
- *  Reads the firmware semaphore register and returns true (>0) if
- *  manageability is enabled, else false (0).
- **/
-bool e1000_check_mng_mode_generic(struct e1000_hw *hw)
-{
-	u32 fwsm = E1000_READ_REG(hw, E1000_FWSM);
-
-	DEBUGFUNC("e1000_check_mng_mode_generic");
-
-
-	return (fwsm & E1000_FWSM_MODE_MASK) ==
-		(E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT);
-}
-
-/**
- *  e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx
- *  @hw: pointer to the HW structure
- *
- *  Enables packet filtering on transmit packets if manageability is enabled
- *  and host interface is enabled.
- **/
-bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw)
-{
-	struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
-	u32 *buffer = (u32 *)&hw->mng_cookie;
-	u32 offset;
-	s32 ret_val, hdr_csum, csum;
-	u8 i, len;
-
-	DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic");
-
-	hw->mac.tx_pkt_filtering = true;
-
-	/* No manageability, no filtering */
-	if (!hw->mac.ops.check_mng_mode(hw)) {
-		hw->mac.tx_pkt_filtering = false;
-		return hw->mac.tx_pkt_filtering;
-	}
-
-	/* If we can't read from the host interface for whatever
-	 * reason, disable filtering.
-	 */
-	ret_val = e1000_mng_enable_host_if_generic(hw);
-	if (ret_val != E1000_SUCCESS) {
-		hw->mac.tx_pkt_filtering = false;
-		return hw->mac.tx_pkt_filtering;
-	}
-
-	/* Read in the header.  Length and offset are in dwords. */
-	len    = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
-	offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
-	for (i = 0; i < len; i++)
-		*(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
-							   offset + i);
-	hdr_csum = hdr->checksum;
-	hdr->checksum = 0;
-	csum = e1000_calculate_checksum((u8 *)hdr,
-					E1000_MNG_DHCP_COOKIE_LENGTH);
-	/* If either the checksums or signature don't match, then
-	 * the cookie area isn't considered valid, in which case we
-	 * take the safe route of assuming Tx filtering is enabled.
-	 */
-	if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) {
-		hw->mac.tx_pkt_filtering = true;
-		return hw->mac.tx_pkt_filtering;
-	}
-
-	/* Cookie area is valid, make the final check for filtering. */
-	if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING))
-		hw->mac.tx_pkt_filtering = false;
-
-	return hw->mac.tx_pkt_filtering;
-}
-
-/**
- *  e1000_mng_write_cmd_header_generic - Writes manageability command header
- *  @hw: pointer to the HW structure
- *  @hdr: pointer to the host interface command header
- *
- *  Writes the command header after does the checksum calculation.
- **/
-s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
-				      struct e1000_host_mng_command_header *hdr)
-{
-	u16 i, length = sizeof(struct e1000_host_mng_command_header);
-
-	DEBUGFUNC("e1000_mng_write_cmd_header_generic");
-
-	/* Write the whole command header structure with new checksum. */
-
-	hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
-
-	length >>= 2;
-	/* Write the relevant command block into the ram area. */
-	for (i = 0; i < length; i++) {
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
-					    *((u32 *) hdr + i));
-		E1000_WRITE_FLUSH(hw);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_mng_host_if_write_generic - Write to the manageability host interface
- *  @hw: pointer to the HW structure
- *  @buffer: pointer to the host interface buffer
- *  @length: size of the buffer
- *  @offset: location in the buffer to write to
- *  @sum: sum of the data (not checksum)
- *
- *  This function writes the buffer content at the offset given on the host if.
- *  It also does alignment considerations to do the writes in most efficient
- *  way.  Also fills up the sum of the buffer in *buffer parameter.
- **/
-s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
-				    u16 length, u16 offset, u8 *sum)
-{
-	u8 *tmp;
-	u8 *bufptr = buffer;
-	u32 data = 0;
-	u16 remaining, i, j, prev_bytes;
-
-	DEBUGFUNC("e1000_mng_host_if_write_generic");
-
-	/* sum = only sum of the data and it is not checksum */
-
-	if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH)
-		return -E1000_ERR_PARAM;
-
-	tmp = (u8 *)&data;
-	prev_bytes = offset & 0x3;
-	offset >>= 2;
-
-	if (prev_bytes) {
-		data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset);
-		for (j = prev_bytes; j < sizeof(u32); j++) {
-			*(tmp + j) = *bufptr++;
-			*sum += *(tmp + j);
-		}
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data);
-		length -= j - prev_bytes;
-		offset++;
-	}
-
-	remaining = length & 0x3;
-	length -= remaining;
-
-	/* Calculate length in DWORDs */
-	length >>= 2;
-
-	/* The device driver writes the relevant command block into the
-	 * ram area.
-	 */
-	for (i = 0; i < length; i++) {
-		for (j = 0; j < sizeof(u32); j++) {
-			*(tmp + j) = *bufptr++;
-			*sum += *(tmp + j);
-		}
-
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
-					    data);
-	}
-	if (remaining) {
-		for (j = 0; j < sizeof(u32); j++) {
-			if (j < remaining)
-				*(tmp + j) = *bufptr++;
-			else
-				*(tmp + j) = 0;
-
-			*sum += *(tmp + j);
-		}
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
-					    data);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface
- *  @hw: pointer to the HW structure
- *  @buffer: pointer to the host interface
- *  @length: size of the buffer
- *
- *  Writes the DHCP information to the host interface.
- **/
-s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer,
-				      u16 length)
-{
-	struct e1000_host_mng_command_header hdr;
-	s32 ret_val;
-	u32 hicr;
-
-	DEBUGFUNC("e1000_mng_write_dhcp_info_generic");
-
-	hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
-	hdr.command_length = length;
-	hdr.reserved1 = 0;
-	hdr.reserved2 = 0;
-	hdr.checksum = 0;
-
-	/* Enable the host interface */
-	ret_val = e1000_mng_enable_host_if_generic(hw);
-	if (ret_val)
-		return ret_val;
-
-	/* Populate the host interface with the contents of "buffer". */
-	ret_val = e1000_mng_host_if_write_generic(hw, buffer, length,
-						  sizeof(hdr), &(hdr.checksum));
-	if (ret_val)
-		return ret_val;
-
-	/* Write the manageability command header */
-	ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr);
-	if (ret_val)
-		return ret_val;
-
-	/* Tell the ARC a new command is pending. */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_enable_mng_pass_thru - Check if management passthrough is needed
- *  @hw: pointer to the HW structure
- *
- *  Verifies the hardware needs to leave interface enabled so that frames can
- *  be directed to and from the management interface.
- **/
-bool e1000_enable_mng_pass_thru(struct e1000_hw *hw)
-{
-	u32 manc;
-	u32 fwsm, factps;
-
-	DEBUGFUNC("e1000_enable_mng_pass_thru");
-
-	if (!hw->mac.asf_firmware_present)
-		return false;
-
-	manc = E1000_READ_REG(hw, E1000_MANC);
-
-	if (!(manc & E1000_MANC_RCV_TCO_EN))
-		return false;
-
-	if (hw->mac.has_fwsm) {
-		fwsm = E1000_READ_REG(hw, E1000_FWSM);
-		factps = E1000_READ_REG(hw, E1000_FACTPS);
-
-		if (!(factps & E1000_FACTPS_MNGCG) &&
-		    ((fwsm & E1000_FWSM_MODE_MASK) ==
-		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)))
-			return true;
-	} else if ((manc & E1000_MANC_SMBUS_EN) &&
-		   !(manc & E1000_MANC_ASF_EN)) {
-		return true;
-	}
-
-	return false;
-}
-
-/**
- *  e1000_host_interface_command - Writes buffer to host interface
- *  @hw: pointer to the HW structure
- *  @buffer: contains a command to write
- *  @length: the byte length of the buffer, must be multiple of 4 bytes
- *
- *  Writes a buffer to the Host Interface.  Upon success, returns E1000_SUCCESS
- *  else returns E1000_ERR_HOST_INTERFACE_COMMAND.
- **/
-s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length)
-{
-	u32 hicr, i;
-
-	DEBUGFUNC("e1000_host_interface_command");
-
-	if (!(hw->mac.arc_subsystem_valid)) {
-		DEBUGOUT("Hardware doesn't support host interface command.\n");
-		return E1000_SUCCESS;
-	}
-
-	if (!hw->mac.asf_firmware_present) {
-		DEBUGOUT("Firmware is not present.\n");
-		return E1000_SUCCESS;
-	}
-
-	if (length == 0 || length & 0x3 ||
-	    length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) {
-		DEBUGOUT("Buffer length failure.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	/* Check that the host interface is enabled. */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	if (!(hicr & E1000_HICR_EN)) {
-		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	/* Calculate length in DWORDs */
-	length >>= 2;
-
-	/* The device driver writes the relevant command block
-	 * into the ram area.
-	 */
-	for (i = 0; i < length; i++)
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
-					    *((u32 *)buffer + i));
-
-	/* Setting this bit tells the ARC that a new command is pending. */
-	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
-
-	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
-		hicr = E1000_READ_REG(hw, E1000_HICR);
-		if (!(hicr & E1000_HICR_C))
-			break;
-		msec_delay(1);
-	}
-
-	/* Check command successful completion. */
-	if (i == E1000_HI_COMMAND_TIMEOUT ||
-	    (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) {
-		DEBUGOUT("Command has failed with no status valid.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	for (i = 0; i < length; i++)
-		*((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw,
-								  E1000_HOST_IF,
-								  i);
-
-	return E1000_SUCCESS;
-}
-/**
- *  e1000_load_firmware - Writes proxy FW code buffer to host interface
- *                        and execute.
- *  @hw: pointer to the HW structure
- *  @buffer: contains a firmware to write
- *  @length: the byte length of the buffer, must be multiple of 4 bytes
- *
- *  Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled
- *  in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND.
- **/
-s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length)
-{
-	u32 hicr, hibba, fwsm, icr, i;
-
-	DEBUGFUNC("e1000_load_firmware");
-
-	if (hw->mac.type < e1000_i210) {
-		DEBUGOUT("Hardware doesn't support loading FW by the driver\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	/* Check that the host interface is enabled. */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	if (!(hicr & E1000_HICR_EN)) {
-		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
-		return -E1000_ERR_CONFIG;
-	}
-	if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) {
-		DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) {
-		DEBUGOUT("Buffer length failure.\n");
-		return -E1000_ERR_INVALID_ARGUMENT;
-	}
-
-	/* Clear notification from ROM-FW by reading ICR register */
-	icr = E1000_READ_REG(hw, E1000_ICR_V2);
-
-	/* Reset ROM-FW */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	hicr |= E1000_HICR_FW_RESET_ENABLE;
-	E1000_WRITE_REG(hw, E1000_HICR, hicr);
-	hicr |= E1000_HICR_FW_RESET;
-	E1000_WRITE_REG(hw, E1000_HICR, hicr);
-	E1000_WRITE_FLUSH(hw);
-
-	/* Wait till MAC notifies about its readiness after ROM-FW reset */
-	for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) {
-		icr = E1000_READ_REG(hw, E1000_ICR_V2);
-		if (icr & E1000_ICR_MNG)
-			break;
-		msec_delay(1);
-	}
-
-	/* Check for timeout */
-	if (i == E1000_HI_COMMAND_TIMEOUT) {
-		DEBUGOUT("FW reset failed.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	/* Wait till MAC is ready to accept new FW code */
-	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
-		fwsm = E1000_READ_REG(hw, E1000_FWSM);
-		if ((fwsm & E1000_FWSM_FW_VALID) &&
-		    ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT ==
-		    E1000_FWSM_HI_EN_ONLY_MODE))
-			break;
-		msec_delay(1);
-	}
-
-	/* Check for timeout */
-	if (i == E1000_HI_COMMAND_TIMEOUT) {
-		DEBUGOUT("FW reset failed.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	/* Calculate length in DWORDs */
-	length >>= 2;
-
-	/* The device driver writes the relevant FW code block
-	 * into the ram area in DWORDs via 1kB ram addressing window.
-	 */
-	for (i = 0; i < length; i++) {
-		if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) {
-			/* Point to correct 1kB ram window */
-			hibba = E1000_HI_FW_BASE_ADDRESS +
-				((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) *
-				(i / E1000_HI_FW_BLOCK_DWORD_LENGTH));
-
-			E1000_WRITE_REG(hw, E1000_HIBBA, hibba);
-		}
-
-		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
-					    i % E1000_HI_FW_BLOCK_DWORD_LENGTH,
-					    *((u32 *)buffer + i));
-	}
-
-	/* Setting this bit tells the ARC that a new FW is ready to execute. */
-	hicr = E1000_READ_REG(hw, E1000_HICR);
-	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
-
-	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
-		hicr = E1000_READ_REG(hw, E1000_HICR);
-		if (!(hicr & E1000_HICR_C))
-			break;
-		msec_delay(1);
-	}
-
-	/* Check for successful FW start. */
-	if (i == E1000_HI_COMMAND_TIMEOUT) {
-		DEBUGOUT("New FW did not start within timeout period.\n");
-		return -E1000_ERR_HOST_INTERFACE_COMMAND;
-	}
-
-	return E1000_SUCCESS;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
deleted file mode 100644
index 9f27b934..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_MANAGE_H_
-#define _E1000_MANAGE_H_
-
-bool e1000_check_mng_mode_generic(struct e1000_hw *hw);
-bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw);
-s32  e1000_mng_enable_host_if_generic(struct e1000_hw *hw);
-s32  e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
-				     u16 length, u16 offset, u8 *sum);
-s32  e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
-				     struct e1000_host_mng_command_header *hdr);
-s32  e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw,
-				       u8 *buffer, u16 length);
-bool e1000_enable_mng_pass_thru(struct e1000_hw *hw);
-u8 e1000_calculate_checksum(u8 *buffer, u32 length);
-s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length);
-s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length);
-
-enum e1000_mng_mode {
-	e1000_mng_mode_none = 0,
-	e1000_mng_mode_asf,
-	e1000_mng_mode_pt,
-	e1000_mng_mode_ipmi,
-	e1000_mng_mode_host_if_only
-};
-
-#define E1000_FACTPS_MNGCG			0x20000000
-
-#define E1000_FWSM_MODE_MASK			0xE
-#define E1000_FWSM_MODE_SHIFT			1
-#define E1000_FWSM_FW_VALID			0x00008000
-#define E1000_FWSM_HI_EN_ONLY_MODE		0x4
-
-#define E1000_MNG_IAMT_MODE			0x3
-#define E1000_MNG_DHCP_COOKIE_LENGTH		0x10
-#define E1000_MNG_DHCP_COOKIE_OFFSET		0x6F0
-#define E1000_MNG_DHCP_COMMAND_TIMEOUT		10
-#define E1000_MNG_DHCP_TX_PAYLOAD_CMD		64
-#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING	0x1
-#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
-
-#define E1000_VFTA_ENTRY_SHIFT			5
-#define E1000_VFTA_ENTRY_MASK			0x7F
-#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK		0x1F
-
-#define E1000_HI_MAX_BLOCK_BYTE_LENGTH		1792 /* Num of bytes in range */
-#define E1000_HI_MAX_BLOCK_DWORD_LENGTH		448 /* Num of dwords in range */
-#define E1000_HI_COMMAND_TIMEOUT		500 /* Process HI cmd limit */
-#define E1000_HI_FW_BASE_ADDRESS		0x10000
-#define E1000_HI_FW_MAX_LENGTH			(64 * 1024) /* Num of bytes */
-#define E1000_HI_FW_BLOCK_DWORD_LENGTH		256 /* Num of DWORDs per page */
-#define E1000_HICR_MEMORY_BASE_EN		0x200 /* MB Enable bit - RO */
-#define E1000_HICR_EN			0x01  /* Enable bit - RO */
-/* Driver sets this bit when done to put command in RAM */
-#define E1000_HICR_C			0x02
-#define E1000_HICR_SV			0x04  /* Status Validity */
-#define E1000_HICR_FW_RESET_ENABLE	0x40
-#define E1000_HICR_FW_RESET		0x80
-
-/* Intel(R) Active Management Technology signature */
-#define E1000_IAMT_SIGNATURE		0x544D4149
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
deleted file mode 100644
index 1be44349..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
+++ /dev/null
@@ -1,510 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_mbx.h"
-
-/**
- *  e1000_null_mbx_check_for_flag - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw,
-					 u16 E1000_UNUSEDARG mbx_id)
-{
-	DEBUGFUNC("e1000_null_mbx_check_flag");
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_mbx_transact - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw,
-				   u32 E1000_UNUSEDARG *msg,
-				   u16 E1000_UNUSEDARG size,
-				   u16 E1000_UNUSEDARG mbx_id)
-{
-	DEBUGFUNC("e1000_null_mbx_rw_msg");
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_mbx - Reads a message from the mailbox
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @mbx_id: id of mailbox to read
- *
- *  returns SUCCESS if it successfully read message from buffer
- **/
-s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_read_mbx");
-
-	/* limit read to size of mailbox */
-	if (size > mbx->size)
-		size = mbx->size;
-
-	if (mbx->ops.read)
-		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
-
-	return ret_val;
-}
-
-/**
- *  e1000_write_mbx - Write a message to the mailbox
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @mbx_id: id of mailbox to write
- *
- *  returns SUCCESS if it successfully copied message into the buffer
- **/
-s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_write_mbx");
-
-	if (size > mbx->size)
-		ret_val = -E1000_ERR_MBX;
-
-	else if (mbx->ops.write)
-		ret_val = mbx->ops.write(hw, msg, size, mbx_id);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_msg - checks to see if someone sent us mail
- *  @hw: pointer to the HW structure
- *  @mbx_id: id of mailbox to check
- *
- *  returns SUCCESS if the Status bit was found or else ERR_MBX
- **/
-s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_msg");
-
-	if (mbx->ops.check_for_msg)
-		ret_val = mbx->ops.check_for_msg(hw, mbx_id);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_ack - checks to see if someone sent us ACK
- *  @hw: pointer to the HW structure
- *  @mbx_id: id of mailbox to check
- *
- *  returns SUCCESS if the Status bit was found or else ERR_MBX
- **/
-s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_ack");
-
-	if (mbx->ops.check_for_ack)
-		ret_val = mbx->ops.check_for_ack(hw, mbx_id);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_rst - checks to see if other side has reset
- *  @hw: pointer to the HW structure
- *  @mbx_id: id of mailbox to check
- *
- *  returns SUCCESS if the Status bit was found or else ERR_MBX
- **/
-s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_rst");
-
-	if (mbx->ops.check_for_rst)
-		ret_val = mbx->ops.check_for_rst(hw, mbx_id);
-
-	return ret_val;
-}
-
-/**
- *  e1000_poll_for_msg - Wait for message notification
- *  @hw: pointer to the HW structure
- *  @mbx_id: id of mailbox to write
- *
- *  returns SUCCESS if it successfully received a message notification
- **/
-static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	int countdown = mbx->timeout;
-
-	DEBUGFUNC("e1000_poll_for_msg");
-
-	if (!countdown || !mbx->ops.check_for_msg)
-		goto out;
-
-	while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) {
-		countdown--;
-		if (!countdown)
-			break;
-		usec_delay(mbx->usec_delay);
-	}
-
-	/* if we failed, all future posted messages fail until reset */
-	if (!countdown)
-		mbx->timeout = 0;
-out:
-	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
-}
-
-/**
- *  e1000_poll_for_ack - Wait for message acknowledgement
- *  @hw: pointer to the HW structure
- *  @mbx_id: id of mailbox to write
- *
- *  returns SUCCESS if it successfully received a message acknowledgement
- **/
-static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	int countdown = mbx->timeout;
-
-	DEBUGFUNC("e1000_poll_for_ack");
-
-	if (!countdown || !mbx->ops.check_for_ack)
-		goto out;
-
-	while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) {
-		countdown--;
-		if (!countdown)
-			break;
-		usec_delay(mbx->usec_delay);
-	}
-
-	/* if we failed, all future posted messages fail until reset */
-	if (!countdown)
-		mbx->timeout = 0;
-out:
-	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
-}
-
-/**
- *  e1000_read_posted_mbx - Wait for message notification and receive message
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @mbx_id: id of mailbox to write
- *
- *  returns SUCCESS if it successfully received a message notification and
- *  copied it into the receive buffer.
- **/
-s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_read_posted_mbx");
-
-	if (!mbx->ops.read)
-		goto out;
-
-	ret_val = e1000_poll_for_msg(hw, mbx_id);
-
-	/* if ack received read message, otherwise we timed out */
-	if (!ret_val)
-		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_write_posted_mbx - Write a message to the mailbox, wait for ack
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @mbx_id: id of mailbox to write
- *
- *  returns SUCCESS if it successfully copied message into the buffer and
- *  received an ack to that message within delay * timeout period
- **/
-s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_write_posted_mbx");
-
-	/* exit if either we can't write or there isn't a defined timeout */
-	if (!mbx->ops.write || !mbx->timeout)
-		goto out;
-
-	/* send msg */
-	ret_val = mbx->ops.write(hw, msg, size, mbx_id);
-
-	/* if msg sent wait until we receive an ack */
-	if (!ret_val)
-		ret_val = e1000_poll_for_ack(hw, mbx_id);
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_init_mbx_ops_generic - Initialize mbx function pointers
- *  @hw: pointer to the HW structure
- *
- *  Sets the function pointers to no-op functions
- **/
-void e1000_init_mbx_ops_generic(struct e1000_hw *hw)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-	mbx->ops.init_params = e1000_null_ops_generic;
-	mbx->ops.read = e1000_null_mbx_transact;
-	mbx->ops.write = e1000_null_mbx_transact;
-	mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag;
-	mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag;
-	mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag;
-	mbx->ops.read_posted = e1000_read_posted_mbx;
-	mbx->ops.write_posted = e1000_write_posted_mbx;
-}
-
-static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask)
-{
-	u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR);
-	s32 ret_val = -E1000_ERR_MBX;
-
-	if (mbvficr & mask) {
-		ret_val = E1000_SUCCESS;
-		E1000_WRITE_REG(hw, E1000_MBVFICR, mask);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_msg_pf - checks to see if the VF has sent mail
- *  @hw: pointer to the HW structure
- *  @vf_number: the VF index
- *
- *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
- **/
-static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number)
-{
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_msg_pf");
-
-	if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) {
-		ret_val = E1000_SUCCESS;
-		hw->mbx.stats.reqs++;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_ack_pf - checks to see if the VF has ACKed
- *  @hw: pointer to the HW structure
- *  @vf_number: the VF index
- *
- *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
- **/
-static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number)
-{
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_ack_pf");
-
-	if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) {
-		ret_val = E1000_SUCCESS;
-		hw->mbx.stats.acks++;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_for_rst_pf - checks to see if the VF has reset
- *  @hw: pointer to the HW structure
- *  @vf_number: the VF index
- *
- *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
- **/
-static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number)
-{
-	u32 vflre = E1000_READ_REG(hw, E1000_VFLRE);
-	s32 ret_val = -E1000_ERR_MBX;
-
-	DEBUGFUNC("e1000_check_for_rst_pf");
-
-	if (vflre & (1 << vf_number)) {
-		ret_val = E1000_SUCCESS;
-		E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number));
-		hw->mbx.stats.rsts++;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_obtain_mbx_lock_pf - obtain mailbox lock
- *  @hw: pointer to the HW structure
- *  @vf_number: the VF index
- *
- *  return SUCCESS if we obtained the mailbox lock
- **/
-static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
-{
-	s32 ret_val = -E1000_ERR_MBX;
-	u32 p2v_mailbox;
-
-	DEBUGFUNC("e1000_obtain_mbx_lock_pf");
-
-	/* Take ownership of the buffer */
-	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
-
-	/* reserve mailbox for vf use */
-	p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
-	if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
-		ret_val = E1000_SUCCESS;
-
-	return ret_val;
-}
-
-/**
- *  e1000_write_mbx_pf - Places a message in the mailbox
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @vf_number: the VF index
- *
- *  returns SUCCESS if it successfully copied message into the buffer
- **/
-static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
-			      u16 vf_number)
-{
-	s32 ret_val;
-	u16 i;
-
-	DEBUGFUNC("e1000_write_mbx_pf");
-
-	/* lock the mailbox to prevent pf/vf race condition */
-	ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
-	if (ret_val)
-		goto out_no_write;
-
-	/* flush msg and acks as we are overwriting the message buffer */
-	e1000_check_for_msg_pf(hw, vf_number);
-	e1000_check_for_ack_pf(hw, vf_number);
-
-	/* copy the caller specified message to the mailbox memory buffer */
-	for (i = 0; i < size; i++)
-		E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]);
-
-	/* Interrupt VF to tell it a message has been sent and release buffer*/
-	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS);
-
-	/* update stats */
-	hw->mbx.stats.msgs_tx++;
-
-out_no_write:
-	return ret_val;
-
-}
-
-/**
- *  e1000_read_mbx_pf - Read a message from the mailbox
- *  @hw: pointer to the HW structure
- *  @msg: The message buffer
- *  @size: Length of buffer
- *  @vf_number: the VF index
- *
- *  This function copies a message from the mailbox buffer to the caller's
- *  memory buffer.  The presumption is that the caller knows that there was
- *  a message due to a VF request so no polling for message is needed.
- **/
-static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
-			     u16 vf_number)
-{
-	s32 ret_val;
-	u16 i;
-
-	DEBUGFUNC("e1000_read_mbx_pf");
-
-	/* lock the mailbox to prevent pf/vf race condition */
-	ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
-	if (ret_val)
-		goto out_no_read;
-
-	/* copy the message to the mailbox memory buffer */
-	for (i = 0; i < size; i++)
-		msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i);
-
-	/* Acknowledge the message and release buffer */
-	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
-
-	/* update stats */
-	hw->mbx.stats.msgs_rx++;
-
-out_no_read:
-	return ret_val;
-}
-
-/**
- *  e1000_init_mbx_params_pf - set initial values for pf mailbox
- *  @hw: pointer to the HW structure
- *
- *  Initializes the hw->mbx struct to correct values for pf mailbox
- */
-s32 e1000_init_mbx_params_pf(struct e1000_hw *hw)
-{
-	struct e1000_mbx_info *mbx = &hw->mbx;
-
-	switch (hw->mac.type) {
-	case e1000_82576:
-	case e1000_i350:
-	case e1000_i354:
-		mbx->timeout = 0;
-		mbx->usec_delay = 0;
-
-		mbx->size = E1000_VFMAILBOX_SIZE;
-
-		mbx->ops.read = e1000_read_mbx_pf;
-		mbx->ops.write = e1000_write_mbx_pf;
-		mbx->ops.read_posted = e1000_read_posted_mbx;
-		mbx->ops.write_posted = e1000_write_posted_mbx;
-		mbx->ops.check_for_msg = e1000_check_for_msg_pf;
-		mbx->ops.check_for_ack = e1000_check_for_ack_pf;
-		mbx->ops.check_for_rst = e1000_check_for_rst_pf;
-
-		mbx->stats.msgs_tx = 0;
-		mbx->stats.msgs_rx = 0;
-		mbx->stats.reqs = 0;
-		mbx->stats.acks = 0;
-		mbx->stats.rsts = 0;
-	default:
-		return E1000_SUCCESS;
-	}
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
deleted file mode 100644
index 5951f18f..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_MBX_H_
-#define _E1000_MBX_H_
-
-#include "e1000_api.h"
-
-#define E1000_P2VMAILBOX_STS	0x00000001 /* Initiate message send to VF */
-#define E1000_P2VMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
-#define E1000_P2VMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
-#define E1000_P2VMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
-#define E1000_P2VMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
-
-#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */
-#define E1000_MBVFICR_VFREQ_VF1	0x00000001 /* bit for VF 1 message */
-#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */
-#define E1000_MBVFICR_VFACK_VF1	0x00010000 /* bit for VF 1 ack */
-
-#define E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
-
-/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
- * PF.  The reverse is true if it is E1000_PF_*.
- * Message ACK's are the value or'd with 0xF0000000
- */
-/* Msgs below or'd with this are the ACK */
-#define E1000_VT_MSGTYPE_ACK	0x80000000
-/* Msgs below or'd with this are the NACK */
-#define E1000_VT_MSGTYPE_NACK	0x40000000
-/* Indicates that VF is still clear to send requests */
-#define E1000_VT_MSGTYPE_CTS	0x20000000
-#define E1000_VT_MSGINFO_SHIFT	16
-/* bits 23:16 are used for extra info for certain messages */
-#define E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
-
-#define E1000_VF_RESET			0x01 /* VF requests reset */
-#define E1000_VF_SET_MAC_ADDR		0x02 /* VF requests to set MAC addr */
-#define E1000_VF_SET_MULTICAST		0x03 /* VF requests to set MC addr */
-#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT)
-#define E1000_VF_SET_MULTICAST_OVERFLOW	(0x80 << E1000_VT_MSGINFO_SHIFT)
-#define E1000_VF_SET_VLAN		0x04 /* VF requests to set VLAN */
-#define E1000_VF_SET_VLAN_ADD		(0x01 << E1000_VT_MSGINFO_SHIFT)
-#define E1000_VF_SET_LPE		0x05 /* reqs to set VMOLR.LPE */
-#define E1000_VF_SET_PROMISC		0x06 /* reqs to clear VMOLR.ROPE/MPME*/
-#define E1000_VF_SET_PROMISC_UNICAST	(0x01 << E1000_VT_MSGINFO_SHIFT)
-#define E1000_VF_SET_PROMISC_MULTICAST	(0x02 << E1000_VT_MSGINFO_SHIFT)
-
-#define E1000_PF_CONTROL_MSG		0x0100 /* PF control message */
-
-#define E1000_VF_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
-#define E1000_VF_MBX_INIT_DELAY		500  /* microseconds between retries */
-
-s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 e1000_check_for_msg(struct e1000_hw *, u16);
-s32 e1000_check_for_ack(struct e1000_hw *, u16);
-s32 e1000_check_for_rst(struct e1000_hw *, u16);
-void e1000_init_mbx_ops_generic(struct e1000_hw *hw);
-s32 e1000_init_mbx_params_pf(struct e1000_hw *);
-
-#endif /* _E1000_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
deleted file mode 100644
index 78c3fc0e..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
+++ /dev/null
@@ -1,950 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-static void e1000_reload_nvm_generic(struct e1000_hw *hw);
-
-/**
- *  e1000_init_nvm_ops_generic - Initialize NVM function pointers
- *  @hw: pointer to the HW structure
- *
- *  Setups up the function pointers to no-op functions
- **/
-void e1000_init_nvm_ops_generic(struct e1000_hw *hw)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	DEBUGFUNC("e1000_init_nvm_ops_generic");
-
-	/* Initialize function pointers */
-	nvm->ops.init_params = e1000_null_ops_generic;
-	nvm->ops.acquire = e1000_null_ops_generic;
-	nvm->ops.read = e1000_null_read_nvm;
-	nvm->ops.release = e1000_null_nvm_generic;
-	nvm->ops.reload = e1000_reload_nvm_generic;
-	nvm->ops.update = e1000_null_ops_generic;
-	nvm->ops.valid_led_default = e1000_null_led_default;
-	nvm->ops.validate = e1000_null_ops_generic;
-	nvm->ops.write = e1000_null_write_nvm;
-}
-
-/**
- *  e1000_null_nvm_read - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
-			u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
-			u16 E1000_UNUSEDARG *c)
-{
-	DEBUGFUNC("e1000_null_read_nvm");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_nvm_generic - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_null_nvm_generic");
-	return;
-}
-
-/**
- *  e1000_null_led_default - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw,
-			   u16 E1000_UNUSEDARG *data)
-{
-	DEBUGFUNC("e1000_null_led_default");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_write_nvm - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
-			 u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
-			 u16 E1000_UNUSEDARG *c)
-{
-	DEBUGFUNC("e1000_null_write_nvm");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_raise_eec_clk - Raise EEPROM clock
- *  @hw: pointer to the HW structure
- *  @eecd: pointer to the EEPROM
- *
- *  Enable/Raise the EEPROM clock bit.
- **/
-static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
-{
-	*eecd = *eecd | E1000_EECD_SK;
-	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
-	E1000_WRITE_FLUSH(hw);
-	usec_delay(hw->nvm.delay_usec);
-}
-
-/**
- *  e1000_lower_eec_clk - Lower EEPROM clock
- *  @hw: pointer to the HW structure
- *  @eecd: pointer to the EEPROM
- *
- *  Clear/Lower the EEPROM clock bit.
- **/
-static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
-{
-	*eecd = *eecd & ~E1000_EECD_SK;
-	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
-	E1000_WRITE_FLUSH(hw);
-	usec_delay(hw->nvm.delay_usec);
-}
-
-/**
- *  e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
- *  @hw: pointer to the HW structure
- *  @data: data to send to the EEPROM
- *  @count: number of bits to shift out
- *
- *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
- *  "data" parameter will be shifted out to the EEPROM one bit at a time.
- *  In order to do this, "data" must be broken down into bits.
- **/
-static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-	u32 mask;
-
-	DEBUGFUNC("e1000_shift_out_eec_bits");
-
-	mask = 0x01 << (count - 1);
-	if (nvm->type == e1000_nvm_eeprom_spi)
-		eecd |= E1000_EECD_DO;
-
-	do {
-		eecd &= ~E1000_EECD_DI;
-
-		if (data & mask)
-			eecd |= E1000_EECD_DI;
-
-		E1000_WRITE_REG(hw, E1000_EECD, eecd);
-		E1000_WRITE_FLUSH(hw);
-
-		usec_delay(nvm->delay_usec);
-
-		e1000_raise_eec_clk(hw, &eecd);
-		e1000_lower_eec_clk(hw, &eecd);
-
-		mask >>= 1;
-	} while (mask);
-
-	eecd &= ~E1000_EECD_DI;
-	E1000_WRITE_REG(hw, E1000_EECD, eecd);
-}
-
-/**
- *  e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
- *  @hw: pointer to the HW structure
- *  @count: number of bits to shift in
- *
- *  In order to read a register from the EEPROM, we need to shift 'count' bits
- *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
- *  the EEPROM (setting the SK bit), and then reading the value of the data out
- *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
- *  always be clear.
- **/
-static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
-{
-	u32 eecd;
-	u32 i;
-	u16 data;
-
-	DEBUGFUNC("e1000_shift_in_eec_bits");
-
-	eecd = E1000_READ_REG(hw, E1000_EECD);
-
-	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
-	data = 0;
-
-	for (i = 0; i < count; i++) {
-		data <<= 1;
-		e1000_raise_eec_clk(hw, &eecd);
-
-		eecd = E1000_READ_REG(hw, E1000_EECD);
-
-		eecd &= ~E1000_EECD_DI;
-		if (eecd & E1000_EECD_DO)
-			data |= 1;
-
-		e1000_lower_eec_clk(hw, &eecd);
-	}
-
-	return data;
-}
-
-/**
- *  e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion
- *  @hw: pointer to the HW structure
- *  @ee_reg: EEPROM flag for polling
- *
- *  Polls the EEPROM status bit for either read or write completion based
- *  upon the value of 'ee_reg'.
- **/
-s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
-{
-	u32 attempts = 100000;
-	u32 i, reg = 0;
-
-	DEBUGFUNC("e1000_poll_eerd_eewr_done");
-
-	for (i = 0; i < attempts; i++) {
-		if (ee_reg == E1000_NVM_POLL_READ)
-			reg = E1000_READ_REG(hw, E1000_EERD);
-		else
-			reg = E1000_READ_REG(hw, E1000_EEWR);
-
-		if (reg & E1000_NVM_RW_REG_DONE)
-			return E1000_SUCCESS;
-
-		usec_delay(5);
-	}
-
-	return -E1000_ERR_NVM;
-}
-
-/**
- *  e1000_acquire_nvm_generic - Generic request for access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
- *  Return successful if access grant bit set, else clear the request for
- *  EEPROM access and return -E1000_ERR_NVM (-1).
- **/
-s32 e1000_acquire_nvm_generic(struct e1000_hw *hw)
-{
-	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
-
-	DEBUGFUNC("e1000_acquire_nvm_generic");
-
-	E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ);
-	eecd = E1000_READ_REG(hw, E1000_EECD);
-
-	while (timeout) {
-		if (eecd & E1000_EECD_GNT)
-			break;
-		usec_delay(5);
-		eecd = E1000_READ_REG(hw, E1000_EECD);
-		timeout--;
-	}
-
-	if (!timeout) {
-		eecd &= ~E1000_EECD_REQ;
-		E1000_WRITE_REG(hw, E1000_EECD, eecd);
-		DEBUGOUT("Could not acquire NVM grant\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_standby_nvm - Return EEPROM to standby state
- *  @hw: pointer to the HW structure
- *
- *  Return the EEPROM to a standby state.
- **/
-static void e1000_standby_nvm(struct e1000_hw *hw)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-
-	DEBUGFUNC("e1000_standby_nvm");
-
-	if (nvm->type == e1000_nvm_eeprom_spi) {
-		/* Toggle CS to flush commands */
-		eecd |= E1000_EECD_CS;
-		E1000_WRITE_REG(hw, E1000_EECD, eecd);
-		E1000_WRITE_FLUSH(hw);
-		usec_delay(nvm->delay_usec);
-		eecd &= ~E1000_EECD_CS;
-		E1000_WRITE_REG(hw, E1000_EECD, eecd);
-		E1000_WRITE_FLUSH(hw);
-		usec_delay(nvm->delay_usec);
-	}
-}
-
-/**
- *  e1000_stop_nvm - Terminate EEPROM command
- *  @hw: pointer to the HW structure
- *
- *  Terminates the current command by inverting the EEPROM's chip select pin.
- **/
-static void e1000_stop_nvm(struct e1000_hw *hw)
-{
-	u32 eecd;
-
-	DEBUGFUNC("e1000_stop_nvm");
-
-	eecd = E1000_READ_REG(hw, E1000_EECD);
-	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
-		/* Pull CS high */
-		eecd |= E1000_EECD_CS;
-		e1000_lower_eec_clk(hw, &eecd);
-	}
-}
-
-/**
- *  e1000_release_nvm_generic - Release exclusive access to EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
- **/
-void e1000_release_nvm_generic(struct e1000_hw *hw)
-{
-	u32 eecd;
-
-	DEBUGFUNC("e1000_release_nvm_generic");
-
-	e1000_stop_nvm(hw);
-
-	eecd = E1000_READ_REG(hw, E1000_EECD);
-	eecd &= ~E1000_EECD_REQ;
-	E1000_WRITE_REG(hw, E1000_EECD, eecd);
-}
-
-/**
- *  e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
- *  @hw: pointer to the HW structure
- *
- *  Setups the EEPROM for reading and writing.
- **/
-static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
-	u8 spi_stat_reg;
-
-	DEBUGFUNC("e1000_ready_nvm_eeprom");
-
-	if (nvm->type == e1000_nvm_eeprom_spi) {
-		u16 timeout = NVM_MAX_RETRY_SPI;
-
-		/* Clear SK and CS */
-		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
-		E1000_WRITE_REG(hw, E1000_EECD, eecd);
-		E1000_WRITE_FLUSH(hw);
-		usec_delay(1);
-
-		/* Read "Status Register" repeatedly until the LSB is cleared.
-		 * The EEPROM will signal that the command has been completed
-		 * by clearing bit 0 of the internal status register.  If it's
-		 * not cleared within 'timeout', then error out.
-		 */
-		while (timeout) {
-			e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
-						 hw->nvm.opcode_bits);
-			spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
-			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
-				break;
-
-			usec_delay(5);
-			e1000_standby_nvm(hw);
-			timeout--;
-		}
-
-		if (!timeout) {
-			DEBUGOUT("SPI NVM Status error\n");
-			return -E1000_ERR_NVM;
-		}
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_nvm_spi - Read EEPROM's using SPI
- *  @hw: pointer to the HW structure
- *  @offset: offset of word in the EEPROM to read
- *  @words: number of words to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM.
- **/
-s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 i = 0;
-	s32 ret_val;
-	u16 word_in;
-	u8 read_opcode = NVM_READ_OPCODE_SPI;
-
-	DEBUGFUNC("e1000_read_nvm_spi");
-
-	/* A check for invalid values:  offset too large, too many words,
-	 * and not enough words.
-	 */
-	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
-	    (words == 0)) {
-		DEBUGOUT("nvm parameter(s) out of bounds\n");
-		return -E1000_ERR_NVM;
-	}
-
-	ret_val = nvm->ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = e1000_ready_nvm_eeprom(hw);
-	if (ret_val)
-		goto release;
-
-	e1000_standby_nvm(hw);
-
-	if ((nvm->address_bits == 8) && (offset >= 128))
-		read_opcode |= NVM_A8_OPCODE_SPI;
-
-	/* Send the READ command (opcode + addr) */
-	e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
-	e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
-
-	/* Read the data.  SPI NVMs increment the address with each byte
-	 * read and will roll over if reading beyond the end.  This allows
-	 * us to read the whole NVM from any offset
-	 */
-	for (i = 0; i < words; i++) {
-		word_in = e1000_shift_in_eec_bits(hw, 16);
-		data[i] = (word_in >> 8) | (word_in << 8);
-	}
-
-release:
-	nvm->ops.release(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_nvm_eerd - Reads EEPROM using EERD register
- *  @hw: pointer to the HW structure
- *  @offset: offset of word in the EEPROM to read
- *  @words: number of words to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM using the EERD register.
- **/
-s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	u32 i, eerd = 0;
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_read_nvm_eerd");
-
-	/* A check for invalid values:  offset too large, too many words,
-	 * too many words for the offset, and not enough words.
-	 */
-	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
-	    (words == 0)) {
-		DEBUGOUT("nvm parameter(s) out of bounds\n");
-		return -E1000_ERR_NVM;
-	}
-
-	for (i = 0; i < words; i++) {
-		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
-		       E1000_NVM_RW_REG_START;
-
-		E1000_WRITE_REG(hw, E1000_EERD, eerd);
-		ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
-		if (ret_val)
-			break;
-
-		data[i] = (E1000_READ_REG(hw, E1000_EERD) >>
-			   E1000_NVM_RW_REG_DATA);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_write_nvm_spi - Write to EEPROM using SPI
- *  @hw: pointer to the HW structure
- *  @offset: offset within the EEPROM to be written to
- *  @words: number of words to write
- *  @data: 16 bit word(s) to be written to the EEPROM
- *
- *  Writes data to EEPROM at offset using SPI interface.
- *
- *  If e1000_update_nvm_checksum is not called after this function , the
- *  EEPROM will most likely contain an invalid checksum.
- **/
-s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
-{
-	struct e1000_nvm_info *nvm = &hw->nvm;
-	s32 ret_val = -E1000_ERR_NVM;
-	u16 widx = 0;
-
-	DEBUGFUNC("e1000_write_nvm_spi");
-
-	/* A check for invalid values:  offset too large, too many words,
-	 * and not enough words.
-	 */
-	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
-	    (words == 0)) {
-		DEBUGOUT("nvm parameter(s) out of bounds\n");
-		return -E1000_ERR_NVM;
-	}
-
-	while (widx < words) {
-		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
-
-		ret_val = nvm->ops.acquire(hw);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = e1000_ready_nvm_eeprom(hw);
-		if (ret_val) {
-			nvm->ops.release(hw);
-			return ret_val;
-		}
-
-		e1000_standby_nvm(hw);
-
-		/* Send the WRITE ENABLE command (8 bit opcode) */
-		e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
-					 nvm->opcode_bits);
-
-		e1000_standby_nvm(hw);
-
-		/* Some SPI eeproms use the 8th address bit embedded in the
-		 * opcode
-		 */
-		if ((nvm->address_bits == 8) && (offset >= 128))
-			write_opcode |= NVM_A8_OPCODE_SPI;
-
-		/* Send the Write command (8-bit opcode + addr) */
-		e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
-		e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
-					 nvm->address_bits);
-
-		/* Loop to allow for up to whole page write of eeprom */
-		while (widx < words) {
-			u16 word_out = data[widx];
-			word_out = (word_out >> 8) | (word_out << 8);
-			e1000_shift_out_eec_bits(hw, word_out, 16);
-			widx++;
-
-			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
-				e1000_standby_nvm(hw);
-				break;
-			}
-		}
-		msec_delay(10);
-		nvm->ops.release(hw);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_pba_string_generic - Read device part number
- *  @hw: pointer to the HW structure
- *  @pba_num: pointer to device part number
- *  @pba_num_size: size of part number buffer
- *
- *  Reads the product board assembly (PBA) number from the EEPROM and stores
- *  the value in pba_num.
- **/
-s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
-				  u32 pba_num_size)
-{
-	s32 ret_val;
-	u16 nvm_data;
-	u16 pba_ptr;
-	u16 offset;
-	u16 length;
-
-	DEBUGFUNC("e1000_read_pba_string_generic");
-
-	if (pba_num == NULL) {
-		DEBUGOUT("PBA string buffer was null\n");
-		return -E1000_ERR_INVALID_ARGUMENT;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	/* if nvm_data is not ptr guard the PBA must be in legacy format which
-	 * means pba_ptr is actually our second data word for the PBA number
-	 * and we can decode it into an ascii string
-	 */
-	if (nvm_data != NVM_PBA_PTR_GUARD) {
-		DEBUGOUT("NVM PBA number is not stored as string\n");
-
-		/* make sure callers buffer is big enough to store the PBA */
-		if (pba_num_size < E1000_PBANUM_LENGTH) {
-			DEBUGOUT("PBA string buffer too small\n");
-			return E1000_ERR_NO_SPACE;
-		}
-
-		/* extract hex string from data and pba_ptr */
-		pba_num[0] = (nvm_data >> 12) & 0xF;
-		pba_num[1] = (nvm_data >> 8) & 0xF;
-		pba_num[2] = (nvm_data >> 4) & 0xF;
-		pba_num[3] = nvm_data & 0xF;
-		pba_num[4] = (pba_ptr >> 12) & 0xF;
-		pba_num[5] = (pba_ptr >> 8) & 0xF;
-		pba_num[6] = '-';
-		pba_num[7] = 0;
-		pba_num[8] = (pba_ptr >> 4) & 0xF;
-		pba_num[9] = pba_ptr & 0xF;
-
-		/* put a null character on the end of our string */
-		pba_num[10] = '\0';
-
-		/* switch all the data but the '-' to hex char */
-		for (offset = 0; offset < 10; offset++) {
-			if (pba_num[offset] < 0xA)
-				pba_num[offset] += '0';
-			else if (pba_num[offset] < 0x10)
-				pba_num[offset] += 'A' - 0xA;
-		}
-
-		return E1000_SUCCESS;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	if (length == 0xFFFF || length == 0) {
-		DEBUGOUT("NVM PBA number section invalid length\n");
-		return -E1000_ERR_NVM_PBA_SECTION;
-	}
-	/* check if pba_num buffer is big enough */
-	if (pba_num_size < (((u32)length * 2) - 1)) {
-		DEBUGOUT("PBA string buffer too small\n");
-		return -E1000_ERR_NO_SPACE;
-	}
-
-	/* trim pba length from start of string */
-	pba_ptr++;
-	length--;
-
-	for (offset = 0; offset < length; offset++) {
-		ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error\n");
-			return ret_val;
-		}
-		pba_num[offset * 2] = (u8)(nvm_data >> 8);
-		pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
-	}
-	pba_num[offset * 2] = '\0';
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_pba_length_generic - Read device part number length
- *  @hw: pointer to the HW structure
- *  @pba_num_size: size of part number buffer
- *
- *  Reads the product board assembly (PBA) number length from the EEPROM and
- *  stores the value in pba_num_size.
- **/
-s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size)
-{
-	s32 ret_val;
-	u16 nvm_data;
-	u16 pba_ptr;
-	u16 length;
-
-	DEBUGFUNC("e1000_read_pba_length_generic");
-
-	if (pba_num_size == NULL) {
-		DEBUGOUT("PBA buffer size was null\n");
-		return -E1000_ERR_INVALID_ARGUMENT;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	 /* if data is not ptr guard the PBA must be in legacy format */
-	if (nvm_data != NVM_PBA_PTR_GUARD) {
-		*pba_num_size = E1000_PBANUM_LENGTH;
-		return E1000_SUCCESS;
-	}
-
-	ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
-	if (ret_val) {
-		DEBUGOUT("NVM Read Error\n");
-		return ret_val;
-	}
-
-	if (length == 0xFFFF || length == 0) {
-		DEBUGOUT("NVM PBA number section invalid length\n");
-		return -E1000_ERR_NVM_PBA_SECTION;
-	}
-
-	/* Convert from length in u16 values to u8 chars, add 1 for NULL,
-	 * and subtract 2 because length field is included in length.
-	 */
-	*pba_num_size = ((u32)length * 2) - 1;
-
-	return E1000_SUCCESS;
-}
-
-
-
-
-
-/**
- *  e1000_read_mac_addr_generic - Read device MAC address
- *  @hw: pointer to the HW structure
- *
- *  Reads the device MAC address from the EEPROM and stores the value.
- *  Since devices with two ports use the same EEPROM, we increment the
- *  last bit in the MAC address for the second port.
- **/
-s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
-{
-	u32 rar_high;
-	u32 rar_low;
-	u16 i;
-
-	rar_high = E1000_READ_REG(hw, E1000_RAH(0));
-	rar_low = E1000_READ_REG(hw, E1000_RAL(0));
-
-	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
-		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
-
-	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
-		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
-
-	for (i = 0; i < ETH_ADDR_LEN; i++)
-		hw->mac.addr[i] = hw->mac.perm_addr[i];
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_validate_nvm_checksum_generic - Validate EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
- *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
- **/
-s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 checksum = 0;
-	u16 i, nvm_data;
-
-	DEBUGFUNC("e1000_validate_nvm_checksum_generic");
-
-	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
-		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error\n");
-			return ret_val;
-		}
-		checksum += nvm_data;
-	}
-
-	if (checksum != (u16) NVM_SUM) {
-		DEBUGOUT("NVM Checksum Invalid\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_update_nvm_checksum_generic - Update EEPROM checksum
- *  @hw: pointer to the HW structure
- *
- *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
- *  up to the checksum.  Then calculates the EEPROM checksum and writes the
- *  value to the EEPROM.
- **/
-s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 checksum = 0;
-	u16 i, nvm_data;
-
-	DEBUGFUNC("e1000_update_nvm_checksum");
-
-	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
-		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
-		if (ret_val) {
-			DEBUGOUT("NVM Read Error while updating checksum.\n");
-			return ret_val;
-		}
-		checksum += nvm_data;
-	}
-	checksum = (u16) NVM_SUM - checksum;
-	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
-	if (ret_val)
-		DEBUGOUT("NVM Write Error while updating checksum.\n");
-
-	return ret_val;
-}
-
-/**
- *  e1000_reload_nvm_generic - Reloads EEPROM
- *  @hw: pointer to the HW structure
- *
- *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
- *  extended control register.
- **/
-static void e1000_reload_nvm_generic(struct e1000_hw *hw)
-{
-	u32 ctrl_ext;
-
-	DEBUGFUNC("e1000_reload_nvm_generic");
-
-	usec_delay(10);
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	ctrl_ext |= E1000_CTRL_EXT_EE_RST;
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- *  e1000_get_fw_version - Get firmware version information
- *  @hw: pointer to the HW structure
- *  @fw_vers: pointer to output version structure
- *
- *  unsupported/not present features return 0 in version structure
- **/
-void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
-{
-	u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
-	u8 q, hval, rem, result;
-	u16 comb_verh, comb_verl, comb_offset;
-
-	memset(fw_vers, 0, sizeof(struct e1000_fw_version));
-
-	/* basic eeprom version numbers, bits used vary by part and by tool
-	 * used to create the nvm images */
-	/* Check which data format we have */
-	hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
-	switch (hw->mac.type) {
-	case e1000_i211:
-		e1000_read_invm_version(hw, fw_vers);
-		return;
-	case e1000_82575:
-	case e1000_82576:
-	case e1000_82580:
-		/* Use this format, unless EETRACK ID exists,
-		 * then use alternate format
-		 */
-		if ((etrack_test &  NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
-			hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
-			fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
-					      >> NVM_MAJOR_SHIFT;
-			fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
-					      >> NVM_MINOR_SHIFT;
-			fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
-			goto etrack_id;
-		}
-		break;
-	case e1000_i210:
-		if (!(e1000_get_flash_presence_i210(hw))) {
-			e1000_read_invm_version(hw, fw_vers);
-			return;
-		}
-		/* fall through */
-	case e1000_i350:
-	case e1000_i354:
-		/* find combo image version */
-		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
-		if ((comb_offset != 0x0) &&
-		    (comb_offset != NVM_VER_INVALID)) {
-
-			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
-					 + 1), 1, &comb_verh);
-			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
-					 1, &comb_verl);
-
-			/* get Option Rom version if it exists and is valid */
-			if ((comb_verh && comb_verl) &&
-			    ((comb_verh != NVM_VER_INVALID) &&
-			     (comb_verl != NVM_VER_INVALID))) {
-
-				fw_vers->or_valid = true;
-				fw_vers->or_major =
-					comb_verl >> NVM_COMB_VER_SHFT;
-				fw_vers->or_build =
-					(comb_verl << NVM_COMB_VER_SHFT)
-					| (comb_verh >> NVM_COMB_VER_SHFT);
-				fw_vers->or_patch =
-					comb_verh & NVM_COMB_VER_MASK;
-			}
-		}
-		break;
-	default:
-		return;
-	}
-	hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
-	fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
-			      >> NVM_MAJOR_SHIFT;
-
-	/* check for old style version format in newer images*/
-	if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
-		eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
-	} else {
-		eeprom_verl = (fw_version & NVM_MINOR_MASK)
-				>> NVM_MINOR_SHIFT;
-	}
-	/* Convert minor value to hex before assigning to output struct
-	 * Val to be converted will not be higher than 99, per tool output
-	 */
-	q = eeprom_verl / NVM_HEX_CONV;
-	hval = q * NVM_HEX_TENS;
-	rem = eeprom_verl % NVM_HEX_CONV;
-	result = hval + rem;
-	fw_vers->eep_minor = result;
-
-etrack_id:
-	if ((etrack_test &  NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
-		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
-		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
-		fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
-			| eeprom_verl;
-	}
-	return;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
deleted file mode 100644
index e27b1c0a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_NVM_H_
-#define _E1000_NVM_H_
-
-
-struct e1000_fw_version {
-	u32 etrack_id;
-	u16 eep_major;
-	u16 eep_minor;
-	u16 eep_build;
-
-	u8 invm_major;
-	u8 invm_minor;
-	u8 invm_img_type;
-
-	bool or_valid;
-	u16 or_major;
-	u16 or_build;
-	u16 or_patch;
-};
-
-
-void e1000_init_nvm_ops_generic(struct e1000_hw *hw);
-s32  e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
-void e1000_null_nvm_generic(struct e1000_hw *hw);
-s32  e1000_null_led_default(struct e1000_hw *hw, u16 *data);
-s32  e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
-s32  e1000_acquire_nvm_generic(struct e1000_hw *hw);
-
-s32  e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
-s32  e1000_read_mac_addr_generic(struct e1000_hw *hw);
-s32  e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
-				   u32 pba_num_size);
-s32  e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size);
-s32  e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
-s32  e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words,
-			 u16 *data);
-s32  e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data);
-s32  e1000_validate_nvm_checksum_generic(struct e1000_hw *hw);
-s32  e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words,
-			 u16 *data);
-s32  e1000_update_nvm_checksum_generic(struct e1000_hw *hw);
-void e1000_release_nvm_generic(struct e1000_hw *hw);
-void e1000_get_fw_version(struct e1000_hw *hw,
-			  struct e1000_fw_version *fw_vers);
-
-#define E1000_STM_OPCODE	0xDB00
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
deleted file mode 100644
index 3228100e..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-/* glue for the OS independent part of e1000
- * includes register access macros
- */
-
-#ifndef _E1000_OSDEP_H_
-#define _E1000_OSDEP_H_
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/sched.h>
-#include "kcompat.h"
-
-#ifndef __INTEL_COMPILER
-#pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-
-#define usec_delay(x) udelay(x)
-#define usec_delay_irq(x) udelay(x)
-#ifndef msec_delay
-#define msec_delay(x) do { \
-	/* Don't mdelay in interrupt context! */ \
-	if (in_interrupt()) \
-		BUG(); \
-	else \
-		msleep(x); \
-} while (0)
-
-/* Some workarounds require millisecond delays and are run during interrupt
- * context.  Most notably, when establishing link, the phy may need tweaking
- * but cannot process phy register reads/writes faster than millisecond
- * intervals...and we establish link due to a "link status change" interrupt.
- */
-#define msec_delay_irq(x) mdelay(x)
-#endif
-
-#define PCI_COMMAND_REGISTER   PCI_COMMAND
-#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
-#define ETH_ADDR_LEN           ETH_ALEN
-
-#ifdef __BIG_ENDIAN
-#define E1000_BIG_ENDIAN __BIG_ENDIAN
-#endif
-
-
-#ifdef DEBUG
-#define DEBUGOUT(S) printk(KERN_DEBUG S)
-#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A)
-#else
-#define DEBUGOUT(S)
-#define DEBUGOUT1(S, A...)
-#endif
-
-#ifdef DEBUG_FUNC
-#define DEBUGFUNC(F) DEBUGOUT(F "\n")
-#else
-#define DEBUGFUNC(F)
-#endif
-#define DEBUGOUT2 DEBUGOUT1
-#define DEBUGOUT3 DEBUGOUT2
-#define DEBUGOUT7 DEBUGOUT3
-
-#define E1000_REGISTER(a, reg) reg
-
-#define E1000_WRITE_REG(a, reg, value) ( \
-    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg))))
-
-#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg)))
-
-#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
-    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))))
-
-#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
-    readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))
-
-#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
-#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
-
-#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
-    writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))))
-
-#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
-    readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))
-
-#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
-    writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))))
-
-#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
-    readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))
-
-#define E1000_WRITE_REG_IO(a, reg, offset) do { \
-    outl(reg, ((a)->io_base));                  \
-    outl(offset, ((a)->io_base + 4));      } while (0)
-
-#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
-
-#define E1000_WRITE_FLASH_REG(a, reg, value) ( \
-    writel((value), ((a)->flash_address + reg)))
-
-#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \
-    writew((value), ((a)->flash_address + reg)))
-
-#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg))
-
-#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg))
-
-#endif /* _E1000_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
deleted file mode 100644
index 1934a309..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
+++ /dev/null
@@ -1,3392 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "e1000_api.h"
-
-static s32 e1000_wait_autoneg(struct e1000_hw *hw);
-/* Cable length tables */
-static const u16 e1000_m88_cable_length_table[] = {
-	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
-#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
-		(sizeof(e1000_m88_cable_length_table) / \
-		 sizeof(e1000_m88_cable_length_table[0]))
-
-static const u16 e1000_igp_2_cable_length_table[] = {
-	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3,
-	6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22,
-	26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40,
-	44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61,
-	66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82,
-	87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95,
-	100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121,
-	124};
-#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
-		(sizeof(e1000_igp_2_cable_length_table) / \
-		 sizeof(e1000_igp_2_cable_length_table[0]))
-
-/**
- *  e1000_init_phy_ops_generic - Initialize PHY function pointers
- *  @hw: pointer to the HW structure
- *
- *  Setups up the function pointers to no-op functions
- **/
-void e1000_init_phy_ops_generic(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	DEBUGFUNC("e1000_init_phy_ops_generic");
-
-	/* Initialize function pointers */
-	phy->ops.init_params = e1000_null_ops_generic;
-	phy->ops.acquire = e1000_null_ops_generic;
-	phy->ops.check_polarity = e1000_null_ops_generic;
-	phy->ops.check_reset_block = e1000_null_ops_generic;
-	phy->ops.commit = e1000_null_ops_generic;
-	phy->ops.force_speed_duplex = e1000_null_ops_generic;
-	phy->ops.get_cfg_done = e1000_null_ops_generic;
-	phy->ops.get_cable_length = e1000_null_ops_generic;
-	phy->ops.get_info = e1000_null_ops_generic;
-	phy->ops.set_page = e1000_null_set_page;
-	phy->ops.read_reg = e1000_null_read_reg;
-	phy->ops.read_reg_locked = e1000_null_read_reg;
-	phy->ops.read_reg_page = e1000_null_read_reg;
-	phy->ops.release = e1000_null_phy_generic;
-	phy->ops.reset = e1000_null_ops_generic;
-	phy->ops.set_d0_lplu_state = e1000_null_lplu_state;
-	phy->ops.set_d3_lplu_state = e1000_null_lplu_state;
-	phy->ops.write_reg = e1000_null_write_reg;
-	phy->ops.write_reg_locked = e1000_null_write_reg;
-	phy->ops.write_reg_page = e1000_null_write_reg;
-	phy->ops.power_up = e1000_null_phy_generic;
-	phy->ops.power_down = e1000_null_phy_generic;
-	phy->ops.read_i2c_byte = e1000_read_i2c_byte_null;
-	phy->ops.write_i2c_byte = e1000_write_i2c_byte_null;
-}
-
-/**
- *  e1000_null_set_page - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw,
-			u16 E1000_UNUSEDARG data)
-{
-	DEBUGFUNC("e1000_null_set_page");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_read_reg - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw,
-			u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data)
-{
-	DEBUGFUNC("e1000_null_read_reg");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_phy_generic - No-op function, return void
- *  @hw: pointer to the HW structure
- **/
-void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_null_phy_generic");
-	return;
-}
-
-/**
- *  e1000_null_lplu_state - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw,
-			  bool E1000_UNUSEDARG active)
-{
-	DEBUGFUNC("e1000_null_lplu_state");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_null_write_reg - No-op function, return 0
- *  @hw: pointer to the HW structure
- **/
-s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw,
-			 u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data)
-{
-	DEBUGFUNC("e1000_null_write_reg");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_i2c_byte_null - No-op function, return 0
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @dev_addr: device address
- *  @data: data value read
- *
- **/
-s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
-			     u8 E1000_UNUSEDARG byte_offset,
-			     u8 E1000_UNUSEDARG dev_addr,
-			     u8 E1000_UNUSEDARG *data)
-{
-	DEBUGFUNC("e1000_read_i2c_byte_null");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_i2c_byte_null - No-op function, return 0
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @dev_addr: device address
- *  @data: data value to write
- *
- **/
-s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
-			      u8 E1000_UNUSEDARG byte_offset,
-			      u8 E1000_UNUSEDARG dev_addr,
-			      u8 E1000_UNUSEDARG data)
-{
-	DEBUGFUNC("e1000_write_i2c_byte_null");
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_check_reset_block_generic - Check if PHY reset is blocked
- *  @hw: pointer to the HW structure
- *
- *  Read the PHY management control register and check whether a PHY reset
- *  is blocked.  If a reset is not blocked return E1000_SUCCESS, otherwise
- *  return E1000_BLK_PHY_RESET (12).
- **/
-s32 e1000_check_reset_block_generic(struct e1000_hw *hw)
-{
-	u32 manc;
-
-	DEBUGFUNC("e1000_check_reset_block");
-
-	manc = E1000_READ_REG(hw, E1000_MANC);
-
-	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
-	       E1000_BLK_PHY_RESET : E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_phy_id - Retrieve the PHY ID and revision
- *  @hw: pointer to the HW structure
- *
- *  Reads the PHY registers and stores the PHY ID and possibly the PHY
- *  revision in the hardware structure.
- **/
-s32 e1000_get_phy_id(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val = E1000_SUCCESS;
-	u16 phy_id;
-
-	DEBUGFUNC("e1000_get_phy_id");
-
-	if (!phy->ops.read_reg)
-		return E1000_SUCCESS;
-
-	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
-	if (ret_val)
-		return ret_val;
-
-	phy->id = (u32)(phy_id << 16);
-	usec_delay(20);
-	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
-	if (ret_val)
-		return ret_val;
-
-	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
-	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
-
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_reset_dsp_generic - Reset PHY DSP
- *  @hw: pointer to the HW structure
- *
- *  Reset the digital signal processor.
- **/
-s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_phy_reset_dsp_generic");
-
-	if (!hw->phy.ops.write_reg)
-		return E1000_SUCCESS;
-
-	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
-	if (ret_val)
-		return ret_val;
-
-	return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
-}
-
-/**
- *  e1000_read_phy_reg_mdic - Read MDI control register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the MDI control register in the PHY at offset and stores the
- *  information read to data.
- **/
-s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	u32 i, mdic = 0;
-
-	DEBUGFUNC("e1000_read_phy_reg_mdic");
-
-	if (offset > MAX_PHY_REG_ADDRESS) {
-		DEBUGOUT1("PHY Address %d is out of range\n", offset);
-		return -E1000_ERR_PARAM;
-	}
-
-	/* Set up Op-code, Phy Address, and register offset in the MDI
-	 * Control register.  The MAC will take care of interfacing with the
-	 * PHY to retrieve the desired data.
-	 */
-	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
-		(phy->addr << E1000_MDIC_PHY_SHIFT) |
-		(E1000_MDIC_OP_READ));
-
-	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
-
-	/* Poll the ready bit to see if the MDI read completed
-	 * Increasing the time out as testing showed failures with
-	 * the lower time out
-	 */
-	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-		usec_delay_irq(50);
-		mdic = E1000_READ_REG(hw, E1000_MDIC);
-		if (mdic & E1000_MDIC_READY)
-			break;
-	}
-	if (!(mdic & E1000_MDIC_READY)) {
-		DEBUGOUT("MDI Read did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (mdic & E1000_MDIC_ERROR) {
-		DEBUGOUT("MDI Error\n");
-		return -E1000_ERR_PHY;
-	}
-	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
-		DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n",
-			  offset,
-			  (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
-		return -E1000_ERR_PHY;
-	}
-	*data = (u16) mdic;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_phy_reg_mdic - Write MDI control register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write to register at offset
- *
- *  Writes data to MDI control register in the PHY at offset.
- **/
-s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	u32 i, mdic = 0;
-
-	DEBUGFUNC("e1000_write_phy_reg_mdic");
-
-	if (offset > MAX_PHY_REG_ADDRESS) {
-		DEBUGOUT1("PHY Address %d is out of range\n", offset);
-		return -E1000_ERR_PARAM;
-	}
-
-	/* Set up Op-code, Phy Address, and register offset in the MDI
-	 * Control register.  The MAC will take care of interfacing with the
-	 * PHY to retrieve the desired data.
-	 */
-	mdic = (((u32)data) |
-		(offset << E1000_MDIC_REG_SHIFT) |
-		(phy->addr << E1000_MDIC_PHY_SHIFT) |
-		(E1000_MDIC_OP_WRITE));
-
-	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
-
-	/* Poll the ready bit to see if the MDI read completed
-	 * Increasing the time out as testing showed failures with
-	 * the lower time out
-	 */
-	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-		usec_delay_irq(50);
-		mdic = E1000_READ_REG(hw, E1000_MDIC);
-		if (mdic & E1000_MDIC_READY)
-			break;
-	}
-	if (!(mdic & E1000_MDIC_READY)) {
-		DEBUGOUT("MDI Write did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (mdic & E1000_MDIC_ERROR) {
-		DEBUGOUT("MDI Error\n");
-		return -E1000_ERR_PHY;
-	}
-	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
-		DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n",
-			  offset,
-			  (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
-		return -E1000_ERR_PHY;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_phy_reg_i2c - Read PHY register using i2c
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the PHY register at offset using the i2c interface and stores the
- *  retrieved information in data.
- **/
-s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	u32 i, i2ccmd = 0;
-
-	DEBUGFUNC("e1000_read_phy_reg_i2c");
-
-	/* Set up Op-code, Phy Address, and register address in the I2CCMD
-	 * register.  The MAC will take care of interfacing with the
-	 * PHY to retrieve the desired data.
-	 */
-	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
-		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
-		  (E1000_I2CCMD_OPCODE_READ));
-
-	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
-
-	/* Poll the ready bit to see if the I2C read completed */
-	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
-		usec_delay(50);
-		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
-		if (i2ccmd & E1000_I2CCMD_READY)
-			break;
-	}
-	if (!(i2ccmd & E1000_I2CCMD_READY)) {
-		DEBUGOUT("I2CCMD Read did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (i2ccmd & E1000_I2CCMD_ERROR) {
-		DEBUGOUT("I2CCMD Error bit set\n");
-		return -E1000_ERR_PHY;
-	}
-
-	/* Need to byte-swap the 16-bit value. */
-	*data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_phy_reg_i2c - Write PHY register using i2c
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Writes the data to PHY register at the offset using the i2c interface.
- **/
-s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	u32 i, i2ccmd = 0;
-	u16 phy_data_swapped;
-
-	DEBUGFUNC("e1000_write_phy_reg_i2c");
-
-	/* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/
-	if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
-		DEBUGOUT1("PHY I2C Address %d is out of range.\n",
-			  hw->phy.addr);
-		return -E1000_ERR_CONFIG;
-	}
-
-	/* Swap the data bytes for the I2C interface */
-	phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00);
-
-	/* Set up Op-code, Phy Address, and register address in the I2CCMD
-	 * register.  The MAC will take care of interfacing with the
-	 * PHY to retrieve the desired data.
-	 */
-	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
-		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
-		  E1000_I2CCMD_OPCODE_WRITE |
-		  phy_data_swapped);
-
-	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
-
-	/* Poll the ready bit to see if the I2C read completed */
-	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
-		usec_delay(50);
-		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
-		if (i2ccmd & E1000_I2CCMD_READY)
-			break;
-	}
-	if (!(i2ccmd & E1000_I2CCMD_READY)) {
-		DEBUGOUT("I2CCMD Write did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (i2ccmd & E1000_I2CCMD_ERROR) {
-		DEBUGOUT("I2CCMD Error bit set\n");
-		return -E1000_ERR_PHY;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_sfp_data_byte - Reads SFP module data.
- *  @hw: pointer to the HW structure
- *  @offset: byte location offset to be read
- *  @data: read data buffer pointer
- *
- *  Reads one byte from SFP module data stored
- *  in SFP resided EEPROM memory or SFP diagnostic area.
- *  Function should be called with
- *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
- *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
- *  access
- **/
-s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data)
-{
-	u32 i = 0;
-	u32 i2ccmd = 0;
-	u32 data_local = 0;
-
-	DEBUGFUNC("e1000_read_sfp_data_byte");
-
-	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
-		DEBUGOUT("I2CCMD command address exceeds upper limit\n");
-		return -E1000_ERR_PHY;
-	}
-
-	/* Set up Op-code, EEPROM Address,in the I2CCMD
-	 * register. The MAC will take care of interfacing with the
-	 * EEPROM to retrieve the desired data.
-	 */
-	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
-		  E1000_I2CCMD_OPCODE_READ);
-
-	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
-
-	/* Poll the ready bit to see if the I2C read completed */
-	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
-		usec_delay(50);
-		data_local = E1000_READ_REG(hw, E1000_I2CCMD);
-		if (data_local & E1000_I2CCMD_READY)
-			break;
-	}
-	if (!(data_local & E1000_I2CCMD_READY)) {
-		DEBUGOUT("I2CCMD Read did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (data_local & E1000_I2CCMD_ERROR) {
-		DEBUGOUT("I2CCMD Error bit set\n");
-		return -E1000_ERR_PHY;
-	}
-	*data = (u8) data_local & 0xFF;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_sfp_data_byte - Writes SFP module data.
- *  @hw: pointer to the HW structure
- *  @offset: byte location offset to write to
- *  @data: data to write
- *
- *  Writes one byte to SFP module data stored
- *  in SFP resided EEPROM memory or SFP diagnostic area.
- *  Function should be called with
- *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
- *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
- *  access
- **/
-s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data)
-{
-	u32 i = 0;
-	u32 i2ccmd = 0;
-	u32 data_local = 0;
-
-	DEBUGFUNC("e1000_write_sfp_data_byte");
-
-	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
-		DEBUGOUT("I2CCMD command address exceeds upper limit\n");
-		return -E1000_ERR_PHY;
-	}
-	/* The programming interface is 16 bits wide
-	 * so we need to read the whole word first
-	 * then update appropriate byte lane and write
-	 * the updated word back.
-	 */
-	/* Set up Op-code, EEPROM Address,in the I2CCMD
-	 * register. The MAC will take care of interfacing
-	 * with an EEPROM to write the data given.
-	 */
-	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
-		  E1000_I2CCMD_OPCODE_READ);
-	/* Set a command to read single word */
-	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
-	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
-		usec_delay(50);
-		/* Poll the ready bit to see if lastly
-		 * launched I2C operation completed
-		 */
-		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
-		if (i2ccmd & E1000_I2CCMD_READY) {
-			/* Check if this is READ or WRITE phase */
-			if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) ==
-			    E1000_I2CCMD_OPCODE_READ) {
-				/* Write the selected byte
-				 * lane and update whole word
-				 */
-				data_local = i2ccmd & 0xFF00;
-				data_local |= data;
-				i2ccmd = ((offset <<
-					E1000_I2CCMD_REG_ADDR_SHIFT) |
-					E1000_I2CCMD_OPCODE_WRITE | data_local);
-				E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
-			} else {
-				break;
-			}
-		}
-	}
-	if (!(i2ccmd & E1000_I2CCMD_READY)) {
-		DEBUGOUT("I2CCMD Write did not complete\n");
-		return -E1000_ERR_PHY;
-	}
-	if (i2ccmd & E1000_I2CCMD_ERROR) {
-		DEBUGOUT("I2CCMD Error bit set\n");
-		return -E1000_ERR_PHY;
-	}
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_phy_reg_m88 - Read m88 PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Acquires semaphore, if necessary, then reads the PHY register at offset
- *  and storing the retrieved information in data.  Release any acquired
- *  semaphores before exiting.
- **/
-s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_read_phy_reg_m88");
-
-	if (!hw->phy.ops.acquire)
-		return E1000_SUCCESS;
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
-					  data);
-
-	hw->phy.ops.release(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_write_phy_reg_m88 - Write m88 PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Acquires semaphore, if necessary, then writes the data to PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	s32 ret_val;
-
-	DEBUGFUNC("e1000_write_phy_reg_m88");
-
-	if (!hw->phy.ops.acquire)
-		return E1000_SUCCESS;
-
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
-					   data);
-
-	hw->phy.ops.release(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_set_page_igp - Set page as on IGP-like PHY(s)
- *  @hw: pointer to the HW structure
- *  @page: page to set (shifted left when necessary)
- *
- *  Sets PHY page required for PHY register access.  Assumes semaphore is
- *  already acquired.  Note, this function sets phy.addr to 1 so the caller
- *  must set it appropriately (if necessary) after this function returns.
- **/
-s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page)
-{
-	DEBUGFUNC("e1000_set_page_igp");
-
-	DEBUGOUT1("Setting page 0x%x\n", page);
-
-	hw->phy.addr = 1;
-
-	return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page);
-}
-
-/**
- *  __e1000_read_phy_reg_igp - Read igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *  @locked: semaphore has already been acquired or not
- *
- *  Acquires semaphore, if necessary, then reads the PHY register at offset
- *  and stores the retrieved information in data.  Release any acquired
- *  semaphores before exiting.
- **/
-static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data,
-				    bool locked)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("__e1000_read_phy_reg_igp");
-
-	if (!locked) {
-		if (!hw->phy.ops.acquire)
-			return E1000_SUCCESS;
-
-		ret_val = hw->phy.ops.acquire(hw);
-		if (ret_val)
-			return ret_val;
-	}
-
-	if (offset > MAX_PHY_MULTI_PAGE_REG)
-		ret_val = e1000_write_phy_reg_mdic(hw,
-						   IGP01E1000_PHY_PAGE_SELECT,
-						   (u16)offset);
-	if (!ret_val)
-		ret_val = e1000_read_phy_reg_mdic(hw,
-						  MAX_PHY_REG_ADDRESS & offset,
-						  data);
-	if (!locked)
-		hw->phy.ops.release(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_read_phy_reg_igp - Read igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Acquires semaphore then reads the PHY register at offset and stores the
- *  retrieved information in data.
- *  Release the acquired semaphore before exiting.
- **/
-s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	return __e1000_read_phy_reg_igp(hw, offset, data, false);
-}
-
-/**
- *  e1000_read_phy_reg_igp_locked - Read igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the PHY register at offset and stores the retrieved information
- *  in data.  Assumes semaphore already acquired.
- **/
-s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	return __e1000_read_phy_reg_igp(hw, offset, data, true);
-}
-
-/**
- *  e1000_write_phy_reg_igp - Write igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *  @locked: semaphore has already been acquired or not
- *
- *  Acquires semaphore, if necessary, then writes the data to PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data,
-				     bool locked)
-{
-	s32 ret_val = E1000_SUCCESS;
-
-	DEBUGFUNC("e1000_write_phy_reg_igp");
-
-	if (!locked) {
-		if (!hw->phy.ops.acquire)
-			return E1000_SUCCESS;
-
-		ret_val = hw->phy.ops.acquire(hw);
-		if (ret_val)
-			return ret_val;
-	}
-
-	if (offset > MAX_PHY_MULTI_PAGE_REG)
-		ret_val = e1000_write_phy_reg_mdic(hw,
-						   IGP01E1000_PHY_PAGE_SELECT,
-						   (u16)offset);
-	if (!ret_val)
-		ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS &
-						       offset,
-						   data);
-	if (!locked)
-		hw->phy.ops.release(hw);
-
-	return ret_val;
-}
-
-/**
- *  e1000_write_phy_reg_igp - Write igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Acquires semaphore then writes the data to PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	return __e1000_write_phy_reg_igp(hw, offset, data, false);
-}
-
-/**
- *  e1000_write_phy_reg_igp_locked - Write igp PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Writes the data to PHY register at the offset.
- *  Assumes semaphore already acquired.
- **/
-s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	return __e1000_write_phy_reg_igp(hw, offset, data, true);
-}
-
-/**
- *  __e1000_read_kmrn_reg - Read kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *  @locked: semaphore has already been acquired or not
- *
- *  Acquires semaphore, if necessary.  Then reads the PHY register at offset
- *  using the kumeran interface.  The information retrieved is stored in data.
- *  Release any acquired semaphores before exiting.
- **/
-static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
-				 bool locked)
-{
-	u32 kmrnctrlsta;
-
-	DEBUGFUNC("__e1000_read_kmrn_reg");
-
-	if (!locked) {
-		s32 ret_val = E1000_SUCCESS;
-
-		if (!hw->phy.ops.acquire)
-			return E1000_SUCCESS;
-
-		ret_val = hw->phy.ops.acquire(hw);
-		if (ret_val)
-			return ret_val;
-	}
-
-	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
-		       E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
-	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
-	E1000_WRITE_FLUSH(hw);
-
-	usec_delay(2);
-
-	kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA);
-	*data = (u16)kmrnctrlsta;
-
-	if (!locked)
-		hw->phy.ops.release(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_read_kmrn_reg_generic -  Read kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Acquires semaphore then reads the PHY register at offset using the
- *  kumeran interface.  The information retrieved is stored in data.
- *  Release the acquired semaphore before exiting.
- **/
-s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	return __e1000_read_kmrn_reg(hw, offset, data, false);
-}
-
-/**
- *  e1000_read_kmrn_reg_locked -  Read kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to be read
- *  @data: pointer to the read data
- *
- *  Reads the PHY register at offset using the kumeran interface.  The
- *  information retrieved is stored in data.
- *  Assumes semaphore already acquired.
- **/
-s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	return __e1000_read_kmrn_reg(hw, offset, data, true);
-}
-
-/**
- *  __e1000_write_kmrn_reg - Write kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *  @locked: semaphore has already been acquired or not
- *
- *  Acquires semaphore, if necessary.  Then write the data to PHY register
- *  at the offset using the kumeran interface.  Release any acquired semaphores
- *  before exiting.
- **/
-static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
-				  bool locked)
-{
-	u32 kmrnctrlsta;
-
-	DEBUGFUNC("e1000_write_kmrn_reg_generic");
-
-	if (!locked) {
-		s32 ret_val = E1000_SUCCESS;
-
-		if (!hw->phy.ops.acquire)
-			return E1000_SUCCESS;
-
-		ret_val = hw->phy.ops.acquire(hw);
-		if (ret_val)
-			return ret_val;
-	}
-
-	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
-		       E1000_KMRNCTRLSTA_OFFSET) | data;
-	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
-	E1000_WRITE_FLUSH(hw);
-
-	usec_delay(2);
-
-	if (!locked)
-		hw->phy.ops.release(hw);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_kmrn_reg_generic -  Write kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Acquires semaphore then writes the data to the PHY register at the offset
- *  using the kumeran interface.  Release the acquired semaphore before exiting.
- **/
-s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	return __e1000_write_kmrn_reg(hw, offset, data, false);
-}
-
-/**
- *  e1000_write_kmrn_reg_locked -  Write kumeran register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Write the data to PHY register at the offset using the kumeran interface.
- *  Assumes semaphore already acquired.
- **/
-s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	return __e1000_write_kmrn_reg(hw, offset, data, true);
-}
-
-/**
- *  e1000_set_master_slave_mode - Setup PHY for Master/slave mode
- *  @hw: pointer to the HW structure
- *
- *  Sets up Master/slave mode
- **/
-static s32 e1000_set_master_slave_mode(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 phy_data;
-
-	/* Resolve Master/Slave mode */
-	ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* load defaults for future use */
-	hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ?
-				   ((phy_data & CR_1000T_MS_VALUE) ?
-				    e1000_ms_force_master :
-				    e1000_ms_force_slave) : e1000_ms_auto;
-
-	switch (hw->phy.ms_type) {
-	case e1000_ms_force_master:
-		phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
-		break;
-	case e1000_ms_force_slave:
-		phy_data |= CR_1000T_MS_ENABLE;
-		phy_data &= ~(CR_1000T_MS_VALUE);
-		break;
-	case e1000_ms_auto:
-		phy_data &= ~CR_1000T_MS_ENABLE;
-		/* fall-through */
-	default:
-		break;
-	}
-
-	return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data);
-}
-
-/**
- *  e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link
- *  @hw: pointer to the HW structure
- *
- *  Sets up Carrier-sense on Transmit and downshift values.
- **/
-s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 phy_data;
-
-	DEBUGFUNC("e1000_copper_link_setup_82577");
-
-	if (hw->phy.reset_disable)
-		return E1000_SUCCESS;
-
-	if (hw->phy.type == e1000_phy_82580) {
-		ret_val = hw->phy.ops.reset(hw);
-		if (ret_val) {
-			DEBUGOUT("Error resetting the PHY.\n");
-			return ret_val;
-		}
-	}
-
-	/* Enable CRS on Tx. This must be set for half-duplex operation. */
-	ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy_data |= I82577_CFG_ASSERT_CRS_ON_TX;
-
-	/* Enable downshift */
-	phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
-
-	ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Set MDI/MDIX mode */
-	ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data);
-	if (ret_val)
-		return ret_val;
-	phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK;
-	/* Options:
-	 *   0 - Auto (default)
-	 *   1 - MDI mode
-	 *   2 - MDI-X mode
-	 */
-	switch (hw->phy.mdix) {
-	case 1:
-		break;
-	case 2:
-		phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX;
-		break;
-	case 0:
-	default:
-		phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX;
-		break;
-	}
-	ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	return e1000_set_master_slave_mode(hw);
-}
-
-/**
- *  e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link
- *  @hw: pointer to the HW structure
- *
- *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
- *  and downshift values are set also.
- **/
-s32 e1000_copper_link_setup_m88(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data;
-
-	DEBUGFUNC("e1000_copper_link_setup_m88");
-
-	if (phy->reset_disable)
-		return E1000_SUCCESS;
-
-	/* Enable CRS on Tx. This must be set for half-duplex operation. */
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
-
-	/* Options:
-	 *   MDI/MDI-X = 0 (default)
-	 *   0 - Auto for all speeds
-	 *   1 - MDI mode
-	 *   2 - MDI-X mode
-	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
-	 */
-	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
-
-	switch (phy->mdix) {
-	case 1:
-		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
-		break;
-	case 2:
-		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
-		break;
-	case 3:
-		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
-		break;
-	case 0:
-	default:
-		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
-		break;
-	}
-
-	/* Options:
-	 *   disable_polarity_correction = 0 (default)
-	 *       Automatic Correction for Reversed Cable Polarity
-	 *   0 - Disabled
-	 *   1 - Enabled
-	 */
-	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
-	if (phy->disable_polarity_correction)
-		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
-
-	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	if (phy->revision < E1000_REVISION_4) {
-		/* Force TX_CLK in the Extended PHY Specific Control Register
-		 * to 25MHz clock.
-		 */
-		ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
-					    &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		phy_data |= M88E1000_EPSCR_TX_CLK_25;
-
-		if ((phy->revision == E1000_REVISION_2) &&
-		    (phy->id == M88E1111_I_PHY_ID)) {
-			/* 82573L PHY - set the downshift counter to 5x. */
-			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
-			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
-		} else {
-			/* Configure Master and Slave downshift values */
-			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
-				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
-			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
-				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
-		}
-		ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
-					     phy_data);
-		if (ret_val)
-			return ret_val;
-	}
-
-	/* Commit the changes. */
-	ret_val = phy->ops.commit(hw);
-	if (ret_val) {
-		DEBUGOUT("Error committing the PHY changes\n");
-		return ret_val;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link
- *  @hw: pointer to the HW structure
- *
- *  Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's.
- *  Also enables and sets the downshift parameters.
- **/
-s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data;
-
-	DEBUGFUNC("e1000_copper_link_setup_m88_gen2");
-
-	if (phy->reset_disable)
-		return E1000_SUCCESS;
-
-	/* Enable CRS on Tx. This must be set for half-duplex operation. */
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Options:
-	 *   MDI/MDI-X = 0 (default)
-	 *   0 - Auto for all speeds
-	 *   1 - MDI mode
-	 *   2 - MDI-X mode
-	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
-	 */
-	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
-
-	switch (phy->mdix) {
-	case 1:
-		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
-		break;
-	case 2:
-		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
-		break;
-	case 3:
-		/* M88E1112 does not support this mode) */
-		if (phy->id != M88E1112_E_PHY_ID) {
-			phy_data |= M88E1000_PSCR_AUTO_X_1000T;
-			break;
-		}
-	case 0:
-	default:
-		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
-		break;
-	}
-
-	/* Options:
-	 *   disable_polarity_correction = 0 (default)
-	 *       Automatic Correction for Reversed Cable Polarity
-	 *   0 - Disabled
-	 *   1 - Enabled
-	 */
-	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
-	if (phy->disable_polarity_correction)
-		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
-
-	/* Enable downshift and setting it to X6 */
-	if (phy->id == M88E1543_E_PHY_ID) {
-		phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE;
-		ret_val =
-		    phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.commit(hw);
-		if (ret_val) {
-			DEBUGOUT("Error committing the PHY changes\n");
-			return ret_val;
-		}
-	}
-
-	phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK;
-	phy_data |= I347AT4_PSCR_DOWNSHIFT_6X;
-	phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE;
-
-	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Commit the changes. */
-	ret_val = phy->ops.commit(hw);
-	if (ret_val) {
-		DEBUGOUT("Error committing the PHY changes\n");
-		return ret_val;
-	}
-
-	ret_val = e1000_set_master_slave_mode(hw);
-	if (ret_val)
-		return ret_val;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_copper_link_setup_igp - Setup igp PHY's for copper link
- *  @hw: pointer to the HW structure
- *
- *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
- *  igp PHY's.
- **/
-s32 e1000_copper_link_setup_igp(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-
-	DEBUGFUNC("e1000_copper_link_setup_igp");
-
-	if (phy->reset_disable)
-		return E1000_SUCCESS;
-
-	ret_val = hw->phy.ops.reset(hw);
-	if (ret_val) {
-		DEBUGOUT("Error resetting the PHY.\n");
-		return ret_val;
-	}
-
-	/* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
-	 * timeout issues when LFS is enabled.
-	 */
-	msec_delay(100);
-
-	/* disable lplu d0 during driver init */
-	if (hw->phy.ops.set_d0_lplu_state) {
-		ret_val = hw->phy.ops.set_d0_lplu_state(hw, false);
-		if (ret_val) {
-			DEBUGOUT("Error Disabling LPLU D0\n");
-			return ret_val;
-		}
-	}
-	/* Configure mdi-mdix settings */
-	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
-	if (ret_val)
-		return ret_val;
-
-	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
-
-	switch (phy->mdix) {
-	case 1:
-		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
-		break;
-	case 2:
-		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
-		break;
-	case 0:
-	default:
-		data |= IGP01E1000_PSCR_AUTO_MDIX;
-		break;
-	}
-	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
-	if (ret_val)
-		return ret_val;
-
-	/* set auto-master slave resolution settings */
-	if (hw->mac.autoneg) {
-		/* when autonegotiation advertisement is only 1000Mbps then we
-		 * should disable SmartSpeed and enable Auto MasterSlave
-		 * resolution as hardware default.
-		 */
-		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
-			/* Disable SmartSpeed */
-			ret_val = phy->ops.read_reg(hw,
-						    IGP01E1000_PHY_PORT_CONFIG,
-						    &data);
-			if (ret_val)
-				return ret_val;
-
-			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
-			ret_val = phy->ops.write_reg(hw,
-						     IGP01E1000_PHY_PORT_CONFIG,
-						     data);
-			if (ret_val)
-				return ret_val;
-
-			/* Set auto Master/Slave resolution process */
-			ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
-			if (ret_val)
-				return ret_val;
-
-			data &= ~CR_1000T_MS_ENABLE;
-			ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
-			if (ret_val)
-				return ret_val;
-		}
-
-		ret_val = e1000_set_master_slave_mode(hw);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
- *  @hw: pointer to the HW structure
- *
- *  Reads the MII auto-neg advertisement register and/or the 1000T control
- *  register and if the PHY is already setup for auto-negotiation, then
- *  return successful.  Otherwise, setup advertisement and flow control to
- *  the appropriate values for the wanted auto-negotiation.
- **/
-static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 mii_autoneg_adv_reg;
-	u16 mii_1000t_ctrl_reg = 0;
-
-	DEBUGFUNC("e1000_phy_setup_autoneg");
-
-	phy->autoneg_advertised &= phy->autoneg_mask;
-
-	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
-	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
-	if (ret_val)
-		return ret_val;
-
-	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
-		/* Read the MII 1000Base-T Control Register (Address 9). */
-		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
-					    &mii_1000t_ctrl_reg);
-		if (ret_val)
-			return ret_val;
-	}
-
-	/* Need to parse both autoneg_advertised and fc and set up
-	 * the appropriate PHY registers.  First we will parse for
-	 * autoneg_advertised software override.  Since we can advertise
-	 * a plethora of combinations, we need to check each bit
-	 * individually.
-	 */
-
-	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
-	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
-	 * the  1000Base-T Control Register (Address 9).
-	 */
-	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
-				 NWAY_AR_100TX_HD_CAPS |
-				 NWAY_AR_10T_FD_CAPS   |
-				 NWAY_AR_10T_HD_CAPS);
-	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
-
-	DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised);
-
-	/* Do we want to advertise 10 Mb Half Duplex? */
-	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
-		DEBUGOUT("Advertise 10mb Half duplex\n");
-		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
-	}
-
-	/* Do we want to advertise 10 Mb Full Duplex? */
-	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
-		DEBUGOUT("Advertise 10mb Full duplex\n");
-		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
-	}
-
-	/* Do we want to advertise 100 Mb Half Duplex? */
-	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
-		DEBUGOUT("Advertise 100mb Half duplex\n");
-		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
-	}
-
-	/* Do we want to advertise 100 Mb Full Duplex? */
-	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
-		DEBUGOUT("Advertise 100mb Full duplex\n");
-		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
-	}
-
-	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
-	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
-		DEBUGOUT("Advertise 1000mb Half duplex request denied!\n");
-
-	/* Do we want to advertise 1000 Mb Full Duplex? */
-	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
-		DEBUGOUT("Advertise 1000mb Full duplex\n");
-		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
-	}
-
-	/* Check for a software override of the flow control settings, and
-	 * setup the PHY advertisement registers accordingly.  If
-	 * auto-negotiation is enabled, then software will have to set the
-	 * "PAUSE" bits to the correct value in the Auto-Negotiation
-	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
-	 * negotiation.
-	 *
-	 * The possible values of the "fc" parameter are:
-	 *      0:  Flow control is completely disabled
-	 *      1:  Rx flow control is enabled (we can receive pause frames
-	 *          but not send pause frames).
-	 *      2:  Tx flow control is enabled (we can send pause frames
-	 *          but we do not support receiving pause frames).
-	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
-	 *  other:  No software override.  The flow control configuration
-	 *          in the EEPROM is used.
-	 */
-	switch (hw->fc.current_mode) {
-	case e1000_fc_none:
-		/* Flow control (Rx & Tx) is completely disabled by a
-		 * software over-ride.
-		 */
-		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
-		break;
-	case e1000_fc_rx_pause:
-		/* Rx Flow control is enabled, and Tx Flow control is
-		 * disabled, by a software over-ride.
-		 *
-		 * Since there really isn't a way to advertise that we are
-		 * capable of Rx Pause ONLY, we will advertise that we
-		 * support both symmetric and asymmetric Rx PAUSE.  Later
-		 * (in e1000_config_fc_after_link_up) we will disable the
-		 * hw's ability to send PAUSE frames.
-		 */
-		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
-		break;
-	case e1000_fc_tx_pause:
-		/* Tx Flow control is enabled, and Rx Flow control is
-		 * disabled, by a software over-ride.
-		 */
-		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
-		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
-		break;
-	case e1000_fc_full:
-		/* Flow control (both Rx and Tx) is enabled by a software
-		 * over-ride.
-		 */
-		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
-		break;
-	default:
-		DEBUGOUT("Flow control param set incorrectly\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
-	if (ret_val)
-		return ret_val;
-
-	DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
-
-	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
-		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
-					     mii_1000t_ctrl_reg);
-
-	return ret_val;
-}
-
-/**
- *  e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
- *  @hw: pointer to the HW structure
- *
- *  Performs initial bounds checking on autoneg advertisement parameter, then
- *  configure to advertise the full capability.  Setup the PHY to autoneg
- *  and restart the negotiation process between the link partner.  If
- *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
- **/
-static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_ctrl;
-
-	DEBUGFUNC("e1000_copper_link_autoneg");
-
-	/* Perform some bounds checking on the autoneg advertisement
-	 * parameter.
-	 */
-	phy->autoneg_advertised &= phy->autoneg_mask;
-
-	/* If autoneg_advertised is zero, we assume it was not defaulted
-	 * by the calling code so we set to advertise full capability.
-	 */
-	if (!phy->autoneg_advertised)
-		phy->autoneg_advertised = phy->autoneg_mask;
-
-	DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
-	ret_val = e1000_phy_setup_autoneg(hw);
-	if (ret_val) {
-		DEBUGOUT("Error Setting up Auto-Negotiation\n");
-		return ret_val;
-	}
-	DEBUGOUT("Restarting Auto-Neg\n");
-
-	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
-	 * the Auto Neg Restart bit in the PHY control register.
-	 */
-	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
-	if (ret_val)
-		return ret_val;
-
-	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
-	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
-	if (ret_val)
-		return ret_val;
-
-	/* Does the user want to wait for Auto-Neg to complete here, or
-	 * check at a later time (for example, callback routine).
-	 */
-	if (phy->autoneg_wait_to_complete) {
-		ret_val = e1000_wait_autoneg(hw);
-		if (ret_val) {
-			DEBUGOUT("Error while waiting for autoneg to complete\n");
-			return ret_val;
-		}
-	}
-
-	hw->mac.get_link_status = true;
-
-	return ret_val;
-}
-
-/**
- *  e1000_setup_copper_link_generic - Configure copper link settings
- *  @hw: pointer to the HW structure
- *
- *  Calls the appropriate function to configure the link for auto-neg or forced
- *  speed and duplex.  Then we check for link, once link is established calls
- *  to configure collision distance and flow control are called.  If link is
- *  not established, we return -E1000_ERR_PHY (-2).
- **/
-s32 e1000_setup_copper_link_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	bool link;
-
-	DEBUGFUNC("e1000_setup_copper_link_generic");
-
-	if (hw->mac.autoneg) {
-		/* Setup autoneg and flow control advertisement and perform
-		 * autonegotiation.
-		 */
-		ret_val = e1000_copper_link_autoneg(hw);
-		if (ret_val)
-			return ret_val;
-	} else {
-		/* PHY will be set to 10H, 10F, 100H or 100F
-		 * depending on user settings.
-		 */
-		DEBUGOUT("Forcing Speed and Duplex\n");
-		ret_val = hw->phy.ops.force_speed_duplex(hw);
-		if (ret_val) {
-			DEBUGOUT("Error Forcing Speed and Duplex\n");
-			return ret_val;
-		}
-	}
-
-	/* Check link status. Wait up to 100 microseconds for link to become
-	 * valid.
-	 */
-	ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
-					     &link);
-	if (ret_val)
-		return ret_val;
-
-	if (link) {
-		DEBUGOUT("Valid link established!!!\n");
-		hw->mac.ops.config_collision_dist(hw);
-		ret_val = e1000_config_fc_after_link_up_generic(hw);
-	} else {
-		DEBUGOUT("Unable to establish link!!!\n");
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
- *  @hw: pointer to the HW structure
- *
- *  Calls the PHY setup function to force speed and duplex.  Clears the
- *  auto-crossover to force MDI manually.  Waits for link and returns
- *  successful if link up is successful, else -E1000_ERR_PHY (-2).
- **/
-s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data;
-	bool link;
-
-	DEBUGFUNC("e1000_phy_force_speed_duplex_igp");
-
-	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
-
-	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
-	 * forced whenever speed and duplex are forced.
-	 */
-	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
-	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
-
-	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	DEBUGOUT1("IGP PSCR: %X\n", phy_data);
-
-	usec_delay(1);
-
-	if (phy->autoneg_wait_to_complete) {
-		DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n");
-
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-
-		if (!link)
-			DEBUGOUT("Link taking longer than expected.\n");
-
-		/* Try once more */
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
- *  @hw: pointer to the HW structure
- *
- *  Calls the PHY setup function to force speed and duplex.  Clears the
- *  auto-crossover to force MDI manually.  Resets the PHY to commit the
- *  changes.  If time expires while waiting for link up, we reset the DSP.
- *  After reset, TX_CLK and CRS on Tx must be set.  Return successful upon
- *  successful completion, else return corresponding error code.
- **/
-s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data;
-	bool link;
-
-	DEBUGFUNC("e1000_phy_force_speed_duplex_m88");
-
-	/* I210 and I211 devices support Auto-Crossover in forced operation. */
-	if (phy->type != e1000_phy_i210) {
-		/* Clear Auto-Crossover to force MDI manually.  M88E1000
-		 * requires MDI forced whenever speed and duplex are forced.
-		 */
-		ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL,
-					    &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
-		ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL,
-					     phy_data);
-		if (ret_val)
-			return ret_val;
-	}
-
-	DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
-
-	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
-
-	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Reset the phy to commit changes. */
-	ret_val = hw->phy.ops.commit(hw);
-	if (ret_val)
-		return ret_val;
-
-	if (phy->autoneg_wait_to_complete) {
-		DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n");
-
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-
-		if (!link) {
-			bool reset_dsp = true;
-
-			switch (hw->phy.id) {
-			case I347AT4_E_PHY_ID:
-			case M88E1340M_E_PHY_ID:
-			case M88E1112_E_PHY_ID:
-			case M88E1543_E_PHY_ID:
-			case I210_I_PHY_ID:
-				reset_dsp = false;
-				break;
-			default:
-				if (hw->phy.type != e1000_phy_m88)
-					reset_dsp = false;
-				break;
-			}
-
-			if (!reset_dsp) {
-				DEBUGOUT("Link taking longer than expected.\n");
-			} else {
-				/* We didn't get link.
-				 * Reset the DSP and cross our fingers.
-				 */
-				ret_val = phy->ops.write_reg(hw,
-						M88E1000_PHY_PAGE_SELECT,
-						0x001d);
-				if (ret_val)
-					return ret_val;
-				ret_val = e1000_phy_reset_dsp_generic(hw);
-				if (ret_val)
-					return ret_val;
-			}
-		}
-
-		/* Try once more */
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-	}
-
-	if (hw->phy.type != e1000_phy_m88)
-		return E1000_SUCCESS;
-
-	if (hw->phy.id == I347AT4_E_PHY_ID ||
-		hw->phy.id == M88E1340M_E_PHY_ID ||
-		hw->phy.id == M88E1112_E_PHY_ID)
-		return E1000_SUCCESS;
-	if (hw->phy.id == I210_I_PHY_ID)
-		return E1000_SUCCESS;
-	if ((hw->phy.id == M88E1543_E_PHY_ID))
-		return E1000_SUCCESS;
-	ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* Resetting the phy means we need to re-force TX_CLK in the
-	 * Extended PHY Specific Control Register to 25MHz clock from
-	 * the reset value of 2.5MHz.
-	 */
-	phy_data |= M88E1000_EPSCR_TX_CLK_25;
-	ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	/* In addition, we must re-enable CRS on Tx for both half and full
-	 * duplex.
-	 */
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
-	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
- *  @hw: pointer to the HW structure
- *
- *  Forces the speed and duplex settings of the PHY.
- *  This is a function pointer entry point only called by
- *  PHY setup routines.
- **/
-s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-	bool link;
-
-	DEBUGFUNC("e1000_phy_force_speed_duplex_ife");
-
-	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data);
-	if (ret_val)
-		return ret_val;
-
-	e1000_phy_force_speed_duplex_setup(hw, &data);
-
-	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data);
-	if (ret_val)
-		return ret_val;
-
-	/* Disable MDI-X support for 10/100 */
-	ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
-	if (ret_val)
-		return ret_val;
-
-	data &= ~IFE_PMC_AUTO_MDIX;
-	data &= ~IFE_PMC_FORCE_MDIX;
-
-	ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data);
-	if (ret_val)
-		return ret_val;
-
-	DEBUGOUT1("IFE PMC: %X\n", data);
-
-	usec_delay(1);
-
-	if (phy->autoneg_wait_to_complete) {
-		DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n");
-
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-
-		if (!link)
-			DEBUGOUT("Link taking longer than expected.\n");
-
-		/* Try once more */
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
- *  @hw: pointer to the HW structure
- *  @phy_ctrl: pointer to current value of PHY_CONTROL
- *
- *  Forces speed and duplex on the PHY by doing the following: disable flow
- *  control, force speed/duplex on the MAC, disable auto speed detection,
- *  disable auto-negotiation, configure duplex, configure speed, configure
- *  the collision distance, write configuration to CTRL register.  The
- *  caller must write to the PHY_CONTROL register for these settings to
- *  take affect.
- **/
-void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
-{
-	struct e1000_mac_info *mac = &hw->mac;
-	u32 ctrl;
-
-	DEBUGFUNC("e1000_phy_force_speed_duplex_setup");
-
-	/* Turn off flow control when forcing speed/duplex */
-	hw->fc.current_mode = e1000_fc_none;
-
-	/* Force speed/duplex on the mac */
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
-	ctrl &= ~E1000_CTRL_SPD_SEL;
-
-	/* Disable Auto Speed Detection */
-	ctrl &= ~E1000_CTRL_ASDE;
-
-	/* Disable autoneg on the phy */
-	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
-
-	/* Forcing Full or Half Duplex? */
-	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
-		ctrl &= ~E1000_CTRL_FD;
-		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
-		DEBUGOUT("Half Duplex\n");
-	} else {
-		ctrl |= E1000_CTRL_FD;
-		*phy_ctrl |= MII_CR_FULL_DUPLEX;
-		DEBUGOUT("Full Duplex\n");
-	}
-
-	/* Forcing 10mb or 100mb? */
-	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
-		ctrl |= E1000_CTRL_SPD_100;
-		*phy_ctrl |= MII_CR_SPEED_100;
-		*phy_ctrl &= ~MII_CR_SPEED_1000;
-		DEBUGOUT("Forcing 100mb\n");
-	} else {
-		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
-		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
-		DEBUGOUT("Forcing 10mb\n");
-	}
-
-	hw->mac.ops.config_collision_dist(hw);
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-}
-
-/**
- *  e1000_set_d3_lplu_state_generic - Sets low power link up state for D3
- *  @hw: pointer to the HW structure
- *  @active: boolean used to enable/disable lplu
- *
- *  Success returns 0, Failure returns 1
- *
- *  The low power link up (lplu) state is set to the power management level D3
- *  and SmartSpeed is disabled when active is true, else clear lplu for D3
- *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
- *  is used during Dx states where the power conservation is most important.
- *  During driver activity, SmartSpeed should be enabled so performance is
- *  maintained.
- **/
-s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-
-	DEBUGFUNC("e1000_set_d3_lplu_state_generic");
-
-	if (!hw->phy.ops.read_reg)
-		return E1000_SUCCESS;
-
-	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
-	if (ret_val)
-		return ret_val;
-
-	if (!active) {
-		data &= ~IGP02E1000_PM_D3_LPLU;
-		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
-					     data);
-		if (ret_val)
-			return ret_val;
-		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
-		 * during Dx states where the power conservation is most
-		 * important.  During driver activity we should enable
-		 * SmartSpeed, so performance is maintained.
-		 */
-		if (phy->smart_speed == e1000_smart_speed_on) {
-			ret_val = phy->ops.read_reg(hw,
-						    IGP01E1000_PHY_PORT_CONFIG,
-						    &data);
-			if (ret_val)
-				return ret_val;
-
-			data |= IGP01E1000_PSCFR_SMART_SPEED;
-			ret_val = phy->ops.write_reg(hw,
-						     IGP01E1000_PHY_PORT_CONFIG,
-						     data);
-			if (ret_val)
-				return ret_val;
-		} else if (phy->smart_speed == e1000_smart_speed_off) {
-			ret_val = phy->ops.read_reg(hw,
-						    IGP01E1000_PHY_PORT_CONFIG,
-						    &data);
-			if (ret_val)
-				return ret_val;
-
-			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
-			ret_val = phy->ops.write_reg(hw,
-						     IGP01E1000_PHY_PORT_CONFIG,
-						     data);
-			if (ret_val)
-				return ret_val;
-		}
-	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
-		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
-		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
-		data |= IGP02E1000_PM_D3_LPLU;
-		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
-					     data);
-		if (ret_val)
-			return ret_val;
-
-		/* When LPLU is enabled, we should disable SmartSpeed */
-		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
-					    &data);
-		if (ret_val)
-			return ret_val;
-
-		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
-		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
-					     data);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_downshift_generic - Checks whether a downshift in speed occurred
- *  @hw: pointer to the HW structure
- *
- *  Success returns 0, Failure returns 1
- *
- *  A downshift is detected by querying the PHY link health.
- **/
-s32 e1000_check_downshift_generic(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, offset, mask;
-
-	DEBUGFUNC("e1000_check_downshift_generic");
-
-	switch (phy->type) {
-	case e1000_phy_i210:
-	case e1000_phy_m88:
-	case e1000_phy_gg82563:
-		offset = M88E1000_PHY_SPEC_STATUS;
-		mask = M88E1000_PSSR_DOWNSHIFT;
-		break;
-	case e1000_phy_igp_2:
-	case e1000_phy_igp_3:
-		offset = IGP01E1000_PHY_LINK_HEALTH;
-		mask = IGP01E1000_PLHR_SS_DOWNGRADE;
-		break;
-	default:
-		/* speed downshift not supported */
-		phy->speed_downgraded = false;
-		return E1000_SUCCESS;
-	}
-
-	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
-
-	if (!ret_val)
-		phy->speed_downgraded = !!(phy_data & mask);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_polarity_m88 - Checks the polarity.
- *  @hw: pointer to the HW structure
- *
- *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
- *
- *  Polarity is determined based on the PHY specific status register.
- **/
-s32 e1000_check_polarity_m88(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-
-	DEBUGFUNC("e1000_check_polarity_m88");
-
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
-
-	if (!ret_val)
-		phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY)
-				       ? e1000_rev_polarity_reversed
-				       : e1000_rev_polarity_normal);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_polarity_igp - Checks the polarity.
- *  @hw: pointer to the HW structure
- *
- *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
- *
- *  Polarity is determined based on the PHY port status register, and the
- *  current speed (since there is no polarity at 100Mbps).
- **/
-s32 e1000_check_polarity_igp(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data, offset, mask;
-
-	DEBUGFUNC("e1000_check_polarity_igp");
-
-	/* Polarity is determined based on the speed of
-	 * our connection.
-	 */
-	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
-	if (ret_val)
-		return ret_val;
-
-	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
-	    IGP01E1000_PSSR_SPEED_1000MBPS) {
-		offset = IGP01E1000_PHY_PCS_INIT_REG;
-		mask = IGP01E1000_PHY_POLARITY_MASK;
-	} else {
-		/* This really only applies to 10Mbps since
-		 * there is no polarity for 100Mbps (always 0).
-		 */
-		offset = IGP01E1000_PHY_PORT_STATUS;
-		mask = IGP01E1000_PSSR_POLARITY_REVERSED;
-	}
-
-	ret_val = phy->ops.read_reg(hw, offset, &data);
-
-	if (!ret_val)
-		phy->cable_polarity = ((data & mask)
-				       ? e1000_rev_polarity_reversed
-				       : e1000_rev_polarity_normal);
-
-	return ret_val;
-}
-
-/**
- *  e1000_check_polarity_ife - Check cable polarity for IFE PHY
- *  @hw: pointer to the HW structure
- *
- *  Polarity is determined on the polarity reversal feature being enabled.
- **/
-s32 e1000_check_polarity_ife(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, offset, mask;
-
-	DEBUGFUNC("e1000_check_polarity_ife");
-
-	/* Polarity is determined based on the reversal feature being enabled.
-	 */
-	if (phy->polarity_correction) {
-		offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
-		mask = IFE_PESC_POLARITY_REVERSED;
-	} else {
-		offset = IFE_PHY_SPECIAL_CONTROL;
-		mask = IFE_PSC_FORCE_POLARITY;
-	}
-
-	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
-
-	if (!ret_val)
-		phy->cable_polarity = ((phy_data & mask)
-				       ? e1000_rev_polarity_reversed
-				       : e1000_rev_polarity_normal);
-
-	return ret_val;
-}
-
-/**
- *  e1000_wait_autoneg - Wait for auto-neg completion
- *  @hw: pointer to the HW structure
- *
- *  Waits for auto-negotiation to complete or for the auto-negotiation time
- *  limit to expire, which ever happens first.
- **/
-static s32 e1000_wait_autoneg(struct e1000_hw *hw)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 i, phy_status;
-
-	DEBUGFUNC("e1000_wait_autoneg");
-
-	if (!hw->phy.ops.read_reg)
-		return E1000_SUCCESS;
-
-	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
-	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
-		if (ret_val)
-			break;
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
-		if (ret_val)
-			break;
-		if (phy_status & MII_SR_AUTONEG_COMPLETE)
-			break;
-		msec_delay(100);
-	}
-
-	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
-	 * has completed.
-	 */
-	return ret_val;
-}
-
-/**
- *  e1000_phy_has_link_generic - Polls PHY for link
- *  @hw: pointer to the HW structure
- *  @iterations: number of times to poll for link
- *  @usec_interval: delay between polling attempts
- *  @success: pointer to whether polling was successful or not
- *
- *  Polls the PHY status register for link, 'iterations' number of times.
- **/
-s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
-			       u32 usec_interval, bool *success)
-{
-	s32 ret_val = E1000_SUCCESS;
-	u16 i, phy_status;
-
-	DEBUGFUNC("e1000_phy_has_link_generic");
-
-	if (!hw->phy.ops.read_reg)
-		return E1000_SUCCESS;
-
-	for (i = 0; i < iterations; i++) {
-		/* Some PHYs require the PHY_STATUS register to be read
-		 * twice due to the link bit being sticky.  No harm doing
-		 * it across the board.
-		 */
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
-		if (ret_val)
-			/* If the first read fails, another entity may have
-			 * ownership of the resources, wait and try again to
-			 * see if they have relinquished the resources yet.
-			 */
-			usec_delay(usec_interval);
-		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
-		if (ret_val)
-			break;
-		if (phy_status & MII_SR_LINK_STATUS)
-			break;
-		if (usec_interval >= 1000)
-			msec_delay_irq(usec_interval/1000);
-		else
-			usec_delay(usec_interval);
-	}
-
-	*success = (i < iterations);
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_cable_length_m88 - Determine cable length for m88 PHY
- *  @hw: pointer to the HW structure
- *
- *  Reads the PHY specific status register to retrieve the cable length
- *  information.  The cable length is determined by averaging the minimum and
- *  maximum values to get the "average" cable length.  The m88 PHY has four
- *  possible cable length values, which are:
- *	Register Value		Cable Length
- *	0			< 50 meters
- *	1			50 - 80 meters
- *	2			80 - 110 meters
- *	3			110 - 140 meters
- *	4			> 140 meters
- **/
-s32 e1000_get_cable_length_m88(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, index;
-
-	DEBUGFUNC("e1000_get_cable_length_m88");
-
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
-		 M88E1000_PSSR_CABLE_LENGTH_SHIFT);
-
-	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
-		return -E1000_ERR_PHY;
-
-	phy->min_cable_length = e1000_m88_cable_length_table[index];
-	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
-
-	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
-
-	return E1000_SUCCESS;
-}
-
-s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, phy_data2, is_cm;
-	u16 index, default_page;
-
-	DEBUGFUNC("e1000_get_cable_length_m88_gen2");
-
-	switch (hw->phy.id) {
-	case I210_I_PHY_ID:
-		/* Get cable length from PHY Cable Diagnostics Control Reg */
-		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
-					    (I347AT4_PCDL + phy->addr),
-					    &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		/* Check if the unit of cable length is meters or cm */
-		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
-					    I347AT4_PCDC, &phy_data2);
-		if (ret_val)
-			return ret_val;
-
-		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
-
-		/* Populate the phy structure with cable length in meters */
-		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
-		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
-		phy->cable_length = phy_data / (is_cm ? 100 : 1);
-		break;
-	case M88E1543_E_PHY_ID:
-	case M88E1340M_E_PHY_ID:
-	case I347AT4_E_PHY_ID:
-		/* Remember the original page select and set it to 7 */
-		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
-					    &default_page);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
-		if (ret_val)
-			return ret_val;
-
-		/* Get cable length from PHY Cable Diagnostics Control Reg */
-		ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
-					    &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		/* Check if the unit of cable length is meters or cm */
-		ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
-		if (ret_val)
-			return ret_val;
-
-		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
-
-		/* Populate the phy structure with cable length in meters */
-		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
-		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
-		phy->cable_length = phy_data / (is_cm ? 100 : 1);
-
-		/* Reset the page select to its original value */
-		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
-					     default_page);
-		if (ret_val)
-			return ret_val;
-		break;
-
-	case M88E1112_E_PHY_ID:
-		/* Remember the original page select and set it to 5 */
-		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
-					    &default_page);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE,
-					    &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
-			M88E1000_PSSR_CABLE_LENGTH_SHIFT;
-
-		if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
-			return -E1000_ERR_PHY;
-
-		phy->min_cable_length = e1000_m88_cable_length_table[index];
-		phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
-
-		phy->cable_length = (phy->min_cable_length +
-				     phy->max_cable_length) / 2;
-
-		/* Reset the page select to its original value */
-		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
-					     default_page);
-		if (ret_val)
-			return ret_val;
-
-		break;
-	default:
-		return -E1000_ERR_PHY;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY
- *  @hw: pointer to the HW structure
- *
- *  The automatic gain control (agc) normalizes the amplitude of the
- *  received signal, adjusting for the attenuation produced by the
- *  cable.  By reading the AGC registers, which represent the
- *  combination of coarse and fine gain value, the value can be put
- *  into a lookup table to obtain the approximate cable length
- *  for each channel.
- **/
-s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, i, agc_value = 0;
-	u16 cur_agc_index, max_agc_index = 0;
-	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
-	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
-		IGP02E1000_PHY_AGC_A,
-		IGP02E1000_PHY_AGC_B,
-		IGP02E1000_PHY_AGC_C,
-		IGP02E1000_PHY_AGC_D
-	};
-
-	DEBUGFUNC("e1000_get_cable_length_igp_2");
-
-	/* Read the AGC registers for all channels */
-	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
-		ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		/* Getting bits 15:9, which represent the combination of
-		 * coarse and fine gain values.  The result is a number
-		 * that can be put into the lookup table to obtain the
-		 * approximate cable length.
-		 */
-		cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
-				 IGP02E1000_AGC_LENGTH_MASK);
-
-		/* Array index bound check. */
-		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
-		    (cur_agc_index == 0))
-			return -E1000_ERR_PHY;
-
-		/* Remove min & max AGC values from calculation. */
-		if (e1000_igp_2_cable_length_table[min_agc_index] >
-		    e1000_igp_2_cable_length_table[cur_agc_index])
-			min_agc_index = cur_agc_index;
-		if (e1000_igp_2_cable_length_table[max_agc_index] <
-		    e1000_igp_2_cable_length_table[cur_agc_index])
-			max_agc_index = cur_agc_index;
-
-		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
-	}
-
-	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
-		      e1000_igp_2_cable_length_table[max_agc_index]);
-	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
-
-	/* Calculate cable length with the error range of +/- 10 meters. */
-	phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
-				 (agc_value - IGP02E1000_AGC_RANGE) : 0);
-	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
-
-	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_phy_info_m88 - Retrieve PHY information
- *  @hw: pointer to the HW structure
- *
- *  Valid for only copper links.  Read the PHY status register (sticky read)
- *  to verify that link is up.  Read the PHY special control register to
- *  determine the polarity and 10base-T extended distance.  Read the PHY
- *  special status register to determine MDI/MDIx and current speed.  If
- *  speed is 1000, then determine cable length, local and remote receiver.
- **/
-s32 e1000_get_phy_info_m88(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32  ret_val;
-	u16 phy_data;
-	bool link;
-
-	DEBUGFUNC("e1000_get_phy_info_m88");
-
-	if (phy->media_type != e1000_media_type_copper) {
-		DEBUGOUT("Phy info is only valid for copper media\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link) {
-		DEBUGOUT("Phy info is only valid if link is up\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy->polarity_correction = !!(phy_data &
-				      M88E1000_PSCR_POLARITY_REVERSAL);
-
-	ret_val = e1000_check_polarity_m88(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX);
-
-	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
-		ret_val = hw->phy.ops.get_cable_length(hw);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data);
-		if (ret_val)
-			return ret_val;
-
-		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
-				? e1000_1000t_rx_status_ok
-				: e1000_1000t_rx_status_not_ok;
-
-		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
-				 ? e1000_1000t_rx_status_ok
-				 : e1000_1000t_rx_status_not_ok;
-	} else {
-		/* Set values to "undefined" */
-		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
-		phy->local_rx = e1000_1000t_rx_status_undefined;
-		phy->remote_rx = e1000_1000t_rx_status_undefined;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_phy_info_igp - Retrieve igp PHY information
- *  @hw: pointer to the HW structure
- *
- *  Read PHY status to determine if link is up.  If link is up, then
- *  set/determine 10base-T extended distance and polarity correction.  Read
- *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
- *  determine on the cable length, local and remote receiver.
- **/
-s32 e1000_get_phy_info_igp(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-	bool link;
-
-	DEBUGFUNC("e1000_get_phy_info_igp");
-
-	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link) {
-		DEBUGOUT("Phy info is only valid if link is up\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	phy->polarity_correction = true;
-
-	ret_val = e1000_check_polarity_igp(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
-	if (ret_val)
-		return ret_val;
-
-	phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX);
-
-	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
-	    IGP01E1000_PSSR_SPEED_1000MBPS) {
-		ret_val = phy->ops.get_cable_length(hw);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
-		if (ret_val)
-			return ret_val;
-
-		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
-				? e1000_1000t_rx_status_ok
-				: e1000_1000t_rx_status_not_ok;
-
-		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
-				 ? e1000_1000t_rx_status_ok
-				 : e1000_1000t_rx_status_not_ok;
-	} else {
-		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
-		phy->local_rx = e1000_1000t_rx_status_undefined;
-		phy->remote_rx = e1000_1000t_rx_status_undefined;
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_phy_info_ife - Retrieves various IFE PHY states
- *  @hw: pointer to the HW structure
- *
- *  Populates "phy" structure with various feature states.
- **/
-s32 e1000_get_phy_info_ife(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-	bool link;
-
-	DEBUGFUNC("e1000_get_phy_info_ife");
-
-	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link) {
-		DEBUGOUT("Phy info is only valid if link is up\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data);
-	if (ret_val)
-		return ret_val;
-	phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE);
-
-	if (phy->polarity_correction) {
-		ret_val = e1000_check_polarity_ife(hw);
-		if (ret_val)
-			return ret_val;
-	} else {
-		/* Polarity is forced */
-		phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY)
-				       ? e1000_rev_polarity_reversed
-				       : e1000_rev_polarity_normal);
-	}
-
-	ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
-	if (ret_val)
-		return ret_val;
-
-	phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS);
-
-	/* The following parameters are undefined for 10/100 operation. */
-	phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
-	phy->local_rx = e1000_1000t_rx_status_undefined;
-	phy->remote_rx = e1000_1000t_rx_status_undefined;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_sw_reset_generic - PHY software reset
- *  @hw: pointer to the HW structure
- *
- *  Does a software reset of the PHY by reading the PHY control register and
- *  setting/write the control register reset bit to the PHY.
- **/
-s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw)
-{
-	s32 ret_val;
-	u16 phy_ctrl;
-
-	DEBUGFUNC("e1000_phy_sw_reset_generic");
-
-	if (!hw->phy.ops.read_reg)
-		return E1000_SUCCESS;
-
-	ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
-	if (ret_val)
-		return ret_val;
-
-	phy_ctrl |= MII_CR_RESET;
-	ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
-	if (ret_val)
-		return ret_val;
-
-	usec_delay(1);
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_hw_reset_generic - PHY hardware reset
- *  @hw: pointer to the HW structure
- *
- *  Verify the reset block is not blocking us from resetting.  Acquire
- *  semaphore (if necessary) and read/set/write the device control reset
- *  bit in the PHY.  Wait the appropriate delay time for the device to
- *  reset and release the semaphore (if necessary).
- **/
-s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u32 ctrl;
-
-	DEBUGFUNC("e1000_phy_hw_reset_generic");
-
-	if (phy->ops.check_reset_block) {
-		ret_val = phy->ops.check_reset_block(hw);
-		if (ret_val)
-			return E1000_SUCCESS;
-	}
-
-	ret_val = phy->ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ctrl = E1000_READ_REG(hw, E1000_CTRL);
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
-	E1000_WRITE_FLUSH(hw);
-
-	usec_delay(phy->reset_delay_us);
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-	E1000_WRITE_FLUSH(hw);
-
-	usec_delay(150);
-
-	phy->ops.release(hw);
-
-	return phy->ops.get_cfg_done(hw);
-}
-
-/**
- *  e1000_get_cfg_done_generic - Generic configuration done
- *  @hw: pointer to the HW structure
- *
- *  Generic function to wait 10 milli-seconds for configuration to complete
- *  and return success.
- **/
-s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw)
-{
-	DEBUGFUNC("e1000_get_cfg_done_generic");
-
-	msec_delay_irq(10);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_phy_init_script_igp3 - Inits the IGP3 PHY
- *  @hw: pointer to the HW structure
- *
- *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
- **/
-s32 e1000_phy_init_script_igp3(struct e1000_hw *hw)
-{
-	DEBUGOUT("Running IGP 3 PHY init script\n");
-
-	/* PHY init IGP 3 */
-	/* Enable rise/fall, 10-mode work in class-A */
-	hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018);
-	/* Remove all caps from Replica path filter */
-	hw->phy.ops.write_reg(hw, 0x2F52, 0x0000);
-	/* Bias trimming for ADC, AFE and Driver (Default) */
-	hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24);
-	/* Increase Hybrid poly bias */
-	hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0);
-	/* Add 4% to Tx amplitude in Gig mode */
-	hw->phy.ops.write_reg(hw, 0x2010, 0x10B0);
-	/* Disable trimming (TTT) */
-	hw->phy.ops.write_reg(hw, 0x2011, 0x0000);
-	/* Poly DC correction to 94.6% + 2% for all channels */
-	hw->phy.ops.write_reg(hw, 0x20DD, 0x249A);
-	/* ABS DC correction to 95.9% */
-	hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3);
-	/* BG temp curve trim */
-	hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE);
-	/* Increasing ADC OPAMP stage 1 currents to max */
-	hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4);
-	/* Force 1000 ( required for enabling PHY regs configuration) */
-	hw->phy.ops.write_reg(hw, 0x0000, 0x0140);
-	/* Set upd_freq to 6 */
-	hw->phy.ops.write_reg(hw, 0x1F30, 0x1606);
-	/* Disable NPDFE */
-	hw->phy.ops.write_reg(hw, 0x1F31, 0xB814);
-	/* Disable adaptive fixed FFE (Default) */
-	hw->phy.ops.write_reg(hw, 0x1F35, 0x002A);
-	/* Enable FFE hysteresis */
-	hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067);
-	/* Fixed FFE for short cable lengths */
-	hw->phy.ops.write_reg(hw, 0x1F54, 0x0065);
-	/* Fixed FFE for medium cable lengths */
-	hw->phy.ops.write_reg(hw, 0x1F55, 0x002A);
-	/* Fixed FFE for long cable lengths */
-	hw->phy.ops.write_reg(hw, 0x1F56, 0x002A);
-	/* Enable Adaptive Clip Threshold */
-	hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0);
-	/* AHT reset limit to 1 */
-	hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF);
-	/* Set AHT master delay to 127 msec */
-	hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC);
-	/* Set scan bits for AHT */
-	hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF);
-	/* Set AHT Preset bits */
-	hw->phy.ops.write_reg(hw, 0x1F79, 0x0210);
-	/* Change integ_factor of channel A to 3 */
-	hw->phy.ops.write_reg(hw, 0x1895, 0x0003);
-	/* Change prop_factor of channels BCD to 8 */
-	hw->phy.ops.write_reg(hw, 0x1796, 0x0008);
-	/* Change cg_icount + enable integbp for channels BCD */
-	hw->phy.ops.write_reg(hw, 0x1798, 0xD008);
-	/* Change cg_icount + enable integbp + change prop_factor_master
-	 * to 8 for channel A
-	 */
-	hw->phy.ops.write_reg(hw, 0x1898, 0xD918);
-	/* Disable AHT in Slave mode on channel A */
-	hw->phy.ops.write_reg(hw, 0x187A, 0x0800);
-	/* Enable LPLU and disable AN to 1000 in non-D0a states,
-	 * Enable SPD+B2B
-	 */
-	hw->phy.ops.write_reg(hw, 0x0019, 0x008D);
-	/* Enable restart AN on an1000_dis change */
-	hw->phy.ops.write_reg(hw, 0x001B, 0x2080);
-	/* Enable wh_fifo read clock in 10/100 modes */
-	hw->phy.ops.write_reg(hw, 0x0014, 0x0045);
-	/* Restart AN, Speed selection is 1000 */
-	hw->phy.ops.write_reg(hw, 0x0000, 0x1340);
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_phy_type_from_id - Get PHY type from id
- *  @phy_id: phy_id read from the phy
- *
- *  Returns the phy type from the id.
- **/
-enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id)
-{
-	enum e1000_phy_type phy_type = e1000_phy_unknown;
-
-	switch (phy_id) {
-	case M88E1000_I_PHY_ID:
-	case M88E1000_E_PHY_ID:
-	case M88E1111_I_PHY_ID:
-	case M88E1011_I_PHY_ID:
-	case M88E1543_E_PHY_ID:
-	case I347AT4_E_PHY_ID:
-	case M88E1112_E_PHY_ID:
-	case M88E1340M_E_PHY_ID:
-		phy_type = e1000_phy_m88;
-		break;
-	case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */
-		phy_type = e1000_phy_igp_2;
-		break;
-	case GG82563_E_PHY_ID:
-		phy_type = e1000_phy_gg82563;
-		break;
-	case IGP03E1000_E_PHY_ID:
-		phy_type = e1000_phy_igp_3;
-		break;
-	case IFE_E_PHY_ID:
-	case IFE_PLUS_E_PHY_ID:
-	case IFE_C_E_PHY_ID:
-		phy_type = e1000_phy_ife;
-		break;
-	case I82580_I_PHY_ID:
-		phy_type = e1000_phy_82580;
-		break;
-	case I210_I_PHY_ID:
-		phy_type = e1000_phy_i210;
-		break;
-	default:
-		phy_type = e1000_phy_unknown;
-		break;
-	}
-	return phy_type;
-}
-
-/**
- *  e1000_determine_phy_address - Determines PHY address.
- *  @hw: pointer to the HW structure
- *
- *  This uses a trial and error method to loop through possible PHY
- *  addresses. It tests each by reading the PHY ID registers and
- *  checking for a match.
- **/
-s32 e1000_determine_phy_address(struct e1000_hw *hw)
-{
-	u32 phy_addr = 0;
-	u32 i;
-	enum e1000_phy_type phy_type = e1000_phy_unknown;
-
-	hw->phy.id = phy_type;
-
-	for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
-		hw->phy.addr = phy_addr;
-		i = 0;
-
-		do {
-			e1000_get_phy_id(hw);
-			phy_type = e1000_get_phy_type_from_id(hw->phy.id);
-
-			/* If phy_type is valid, break - we found our
-			 * PHY address
-			 */
-			if (phy_type != e1000_phy_unknown)
-				return E1000_SUCCESS;
-
-			msec_delay(1);
-			i++;
-		} while (i < 10);
-	}
-
-	return -E1000_ERR_PHY_TYPE;
-}
-
-/**
- * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
- * @hw: pointer to the HW structure
- *
- * In the case of a PHY power down to save power, or to turn off link during a
- * driver unload, or wake on lan is not enabled, restore the link to previous
- * settings.
- **/
-void e1000_power_up_phy_copper(struct e1000_hw *hw)
-{
-	u16 mii_reg = 0;
-	u16 power_reg = 0;
-
-	/* The PHY will retain its settings across a power down/up cycle */
-	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
-	mii_reg &= ~MII_CR_POWER_DOWN;
-	if (hw->phy.type == e1000_phy_i210) {
-		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
-		power_reg &= ~GS40G_CS_POWER_DOWN;
-		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
-	}
-	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
-}
-
-/**
- * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
- * @hw: pointer to the HW structure
- *
- * In the case of a PHY power down to save power, or to turn off link during a
- * driver unload, or wake on lan is not enabled, restore the link to previous
- * settings.
- **/
-void e1000_power_down_phy_copper(struct e1000_hw *hw)
-{
-	u16 mii_reg = 0;
-	u16 power_reg = 0;
-
-	/* The PHY will retain its settings across a power down/up cycle */
-	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
-	mii_reg |= MII_CR_POWER_DOWN;
-	/* i210 Phy requires an additional bit for power up/down */
-	if (hw->phy.type == e1000_phy_i210) {
-		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
-		power_reg |= GS40G_CS_POWER_DOWN;
-		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
-	}
-	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
-	msec_delay(1);
-}
-
-/**
- *  e1000_check_polarity_82577 - Checks the polarity.
- *  @hw: pointer to the HW structure
- *
- *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
- *
- *  Polarity is determined based on the PHY specific status register.
- **/
-s32 e1000_check_polarity_82577(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-
-	DEBUGFUNC("e1000_check_polarity_82577");
-
-	ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
-
-	if (!ret_val)
-		phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY)
-				       ? e1000_rev_polarity_reversed
-				       : e1000_rev_polarity_normal);
-
-	return ret_val;
-}
-
-/**
- *  e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY
- *  @hw: pointer to the HW structure
- *
- *  Calls the PHY setup function to force speed and duplex.
- **/
-s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data;
-	bool link;
-
-	DEBUGFUNC("e1000_phy_force_speed_duplex_82577");
-
-	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
-
-	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
-	if (ret_val)
-		return ret_val;
-
-	usec_delay(1);
-
-	if (phy->autoneg_wait_to_complete) {
-		DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n");
-
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-		if (ret_val)
-			return ret_val;
-
-		if (!link)
-			DEBUGOUT("Link taking longer than expected.\n");
-
-		/* Try once more */
-		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
-						     100000, &link);
-	}
-
-	return ret_val;
-}
-
-/**
- *  e1000_get_phy_info_82577 - Retrieve I82577 PHY information
- *  @hw: pointer to the HW structure
- *
- *  Read PHY status to determine if link is up.  If link is up, then
- *  set/determine 10base-T extended distance and polarity correction.  Read
- *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
- *  determine on the cable length, local and remote receiver.
- **/
-s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 data;
-	bool link;
-
-	DEBUGFUNC("e1000_get_phy_info_82577");
-
-	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
-	if (ret_val)
-		return ret_val;
-
-	if (!link) {
-		DEBUGOUT("Phy info is only valid if link is up\n");
-		return -E1000_ERR_CONFIG;
-	}
-
-	phy->polarity_correction = true;
-
-	ret_val = e1000_check_polarity_82577(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
-	if (ret_val)
-		return ret_val;
-
-	phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX);
-
-	if ((data & I82577_PHY_STATUS2_SPEED_MASK) ==
-	    I82577_PHY_STATUS2_SPEED_1000MBPS) {
-		ret_val = hw->phy.ops.get_cable_length(hw);
-		if (ret_val)
-			return ret_val;
-
-		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
-		if (ret_val)
-			return ret_val;
-
-		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
-				? e1000_1000t_rx_status_ok
-				: e1000_1000t_rx_status_not_ok;
-
-		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
-				 ? e1000_1000t_rx_status_ok
-				 : e1000_1000t_rx_status_not_ok;
-	} else {
-		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
-		phy->local_rx = e1000_1000t_rx_status_undefined;
-		phy->remote_rx = e1000_1000t_rx_status_undefined;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_get_cable_length_82577 - Determine cable length for 82577 PHY
- *  @hw: pointer to the HW structure
- *
- * Reads the diagnostic status register and verifies result is valid before
- * placing it in the phy_cable_length field.
- **/
-s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
-{
-	struct e1000_phy_info *phy = &hw->phy;
-	s32 ret_val;
-	u16 phy_data, length;
-
-	DEBUGFUNC("e1000_get_cable_length_82577");
-
-	ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data);
-	if (ret_val)
-		return ret_val;
-
-	length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
-		  I82577_DSTATUS_CABLE_LENGTH_SHIFT);
-
-	if (length == E1000_CABLE_LENGTH_UNDEFINED)
-		return -E1000_ERR_PHY;
-
-	phy->cable_length = length;
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_phy_reg_gs40g - Write GS40G  PHY register
- *  @hw: pointer to the HW structure
- *  @offset: register offset to write to
- *  @data: data to write at register offset
- *
- *  Acquires semaphore, if necessary, then writes the data to PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data)
-{
-	s32 ret_val;
-	u16 page = offset >> GS40G_PAGE_SHIFT;
-
-	DEBUGFUNC("e1000_write_phy_reg_gs40g");
-
-	offset = offset & GS40G_OFFSET_MASK;
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
-	if (ret_val)
-		goto release;
-	ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
-
-release:
-	hw->phy.ops.release(hw);
-	return ret_val;
-}
-
-/**
- *  e1000_read_phy_reg_gs40g - Read GS40G  PHY register
- *  @hw: pointer to the HW structure
- *  @offset: lower half is register offset to read to
- *     upper half is page to use.
- *  @data: data to read at register offset
- *
- *  Acquires semaphore, if necessary, then reads the data in the PHY register
- *  at the offset.  Release any acquired semaphores before exiting.
- **/
-s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data)
-{
-	s32 ret_val;
-	u16 page = offset >> GS40G_PAGE_SHIFT;
-
-	DEBUGFUNC("e1000_read_phy_reg_gs40g");
-
-	offset = offset & GS40G_OFFSET_MASK;
-	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
-		return ret_val;
-
-	ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
-	if (ret_val)
-		goto release;
-	ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
-
-release:
-	hw->phy.ops.release(hw);
-	return ret_val;
-}
-
-/**
- *  e1000_read_phy_reg_mphy - Read mPHY control register
- *  @hw: pointer to the HW structure
- *  @address: address to be read
- *  @data: pointer to the read data
- *
- *  Reads the mPHY control register in the PHY at offset and stores the
- *  information read to data.
- **/
-s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
-{
-	u32 mphy_ctrl = 0;
-	bool locked = false;
-	bool ready = false;
-
-	DEBUGFUNC("e1000_read_phy_reg_mphy");
-
-	/* Check if mPHY is ready to read/write operations */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-
-	/* Check if mPHY access is disabled and enable it if so */
-	mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
-	if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
-		locked = true;
-		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-		mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
-		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
-	}
-
-	/* Set the address that we want to read */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-
-	/* We mask address, because we want to use only current lane */
-	mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK &
-		~E1000_MPHY_ADDRESS_FNC_OVERRIDE) |
-		(address & E1000_MPHY_ADDRESS_MASK);
-	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
-
-	/* Read data from the address */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-	*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
-
-	/* Disable access to mPHY if it was originally disabled */
-	if (locked) {
-		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
-				E1000_MPHY_DIS_ACCESS);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_write_phy_reg_mphy - Write mPHY control register
- *  @hw: pointer to the HW structure
- *  @address: address to write to
- *  @data: data to write to register at offset
- *  @line_override: used when we want to use different line than default one
- *
- *  Writes data to mPHY control register.
- **/
-s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
-			     bool line_override)
-{
-	u32 mphy_ctrl = 0;
-	bool locked = false;
-	bool ready = false;
-
-	DEBUGFUNC("e1000_write_phy_reg_mphy");
-
-	/* Check if mPHY is ready to read/write operations */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-
-	/* Check if mPHY access is disabled and enable it if so */
-	mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
-	if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
-		locked = true;
-		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-		mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
-		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
-	}
-
-	/* Set the address that we want to read */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-
-	/* We mask address, because we want to use only current lane */
-	if (line_override)
-		mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE;
-	else
-		mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE;
-	mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) |
-		(address & E1000_MPHY_ADDRESS_MASK);
-	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
-
-	/* Read data from the address */
-	ready = e1000_is_mphy_ready(hw);
-	if (!ready)
-		return -E1000_ERR_PHY;
-	E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
-
-	/* Disable access to mPHY if it was originally disabled */
-	if (locked) {
-		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
-				E1000_MPHY_DIS_ACCESS);
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_is_mphy_ready - Check if mPHY control register is not busy
- *  @hw: pointer to the HW structure
- *
- *  Returns mPHY control register status.
- **/
-bool e1000_is_mphy_ready(struct e1000_hw *hw)
-{
-	u16 retry_count = 0;
-	u32 mphy_ctrl = 0;
-	bool ready = false;
-
-	while (retry_count < 2) {
-		mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
-		if (mphy_ctrl & E1000_MPHY_BUSY) {
-			usec_delay(20);
-			retry_count++;
-			continue;
-		}
-		ready = true;
-		break;
-	}
-
-	if (!ready)
-		DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n");
-
-	return ready;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
deleted file mode 100644
index 67e9ba77..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_PHY_H_
-#define _E1000_PHY_H_
-
-void e1000_init_phy_ops_generic(struct e1000_hw *hw);
-s32  e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data);
-void e1000_null_phy_generic(struct e1000_hw *hw);
-s32  e1000_null_lplu_state(struct e1000_hw *hw, bool active);
-s32  e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_null_set_page(struct e1000_hw *hw, u16 data);
-s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
-			     u8 dev_addr, u8 *data);
-s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
-			      u8 dev_addr, u8 data);
-s32  e1000_check_downshift_generic(struct e1000_hw *hw);
-s32  e1000_check_polarity_m88(struct e1000_hw *hw);
-s32  e1000_check_polarity_igp(struct e1000_hw *hw);
-s32  e1000_check_polarity_ife(struct e1000_hw *hw);
-s32  e1000_check_reset_block_generic(struct e1000_hw *hw);
-s32  e1000_copper_link_setup_igp(struct e1000_hw *hw);
-s32  e1000_copper_link_setup_m88(struct e1000_hw *hw);
-s32  e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw);
-s32  e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw);
-s32  e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw);
-s32  e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
-s32  e1000_get_cable_length_m88(struct e1000_hw *hw);
-s32  e1000_get_cable_length_m88_gen2(struct e1000_hw *hw);
-s32  e1000_get_cable_length_igp_2(struct e1000_hw *hw);
-s32  e1000_get_cfg_done_generic(struct e1000_hw *hw);
-s32  e1000_get_phy_id(struct e1000_hw *hw);
-s32  e1000_get_phy_info_igp(struct e1000_hw *hw);
-s32  e1000_get_phy_info_m88(struct e1000_hw *hw);
-s32  e1000_get_phy_info_ife(struct e1000_hw *hw);
-s32  e1000_phy_sw_reset_generic(struct e1000_hw *hw);
-void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
-s32  e1000_phy_hw_reset_generic(struct e1000_hw *hw);
-s32  e1000_phy_reset_dsp_generic(struct e1000_hw *hw);
-s32  e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_set_page_igp(struct e1000_hw *hw, u16 page);
-s32  e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active);
-s32  e1000_setup_copper_link_generic(struct e1000_hw *hw);
-s32  e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
-				u32 usec_interval, bool *success);
-s32  e1000_phy_init_script_igp3(struct e1000_hw *hw);
-enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id);
-s32  e1000_determine_phy_address(struct e1000_hw *hw);
-s32  e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
-s32  e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
-void e1000_power_up_phy_copper(struct e1000_hw *hw);
-void e1000_power_down_phy_copper(struct e1000_hw *hw);
-s32  e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
-s32  e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data);
-s32  e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data);
-s32  e1000_copper_link_setup_82577(struct e1000_hw *hw);
-s32  e1000_check_polarity_82577(struct e1000_hw *hw);
-s32  e1000_get_phy_info_82577(struct e1000_hw *hw);
-s32  e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw);
-s32  e1000_get_cable_length_82577(struct e1000_hw *hw);
-s32  e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data);
-s32  e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data);
-s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data);
-s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
-			     bool line_override);
-bool e1000_is_mphy_ready(struct e1000_hw *hw);
-
-#define E1000_MAX_PHY_ADDR		8
-
-/* IGP01E1000 Specific Registers */
-#define IGP01E1000_PHY_PORT_CONFIG	0x10 /* Port Config */
-#define IGP01E1000_PHY_PORT_STATUS	0x11 /* Status */
-#define IGP01E1000_PHY_PORT_CTRL	0x12 /* Control */
-#define IGP01E1000_PHY_LINK_HEALTH	0x13 /* PHY Link Health */
-#define IGP02E1000_PHY_POWER_MGMT	0x19 /* Power Management */
-#define IGP01E1000_PHY_PAGE_SELECT	0x1F /* Page Select */
-#define BM_PHY_PAGE_SELECT		22   /* Page Select for BM */
-#define IGP_PAGE_SHIFT			5
-#define PHY_REG_MASK			0x1F
-
-/* GS40G - I210 PHY defines */
-#define GS40G_PAGE_SELECT		0x16
-#define GS40G_PAGE_SHIFT		16
-#define GS40G_OFFSET_MASK		0xFFFF
-#define GS40G_PAGE_2			0x20000
-#define GS40G_MAC_REG2			0x15
-#define GS40G_MAC_LB			0x4140
-#define GS40G_MAC_SPEED_1G		0X0006
-#define GS40G_COPPER_SPEC		0x0010
-#define GS40G_CS_POWER_DOWN		0x0002
-
-#define HV_INTC_FC_PAGE_START		768
-#define I82578_ADDR_REG			29
-#define I82577_ADDR_REG			16
-#define I82577_CFG_REG			22
-#define I82577_CFG_ASSERT_CRS_ON_TX	(1 << 15)
-#define I82577_CFG_ENABLE_DOWNSHIFT	(3 << 10) /* auto downshift */
-#define I82577_CTRL_REG			23
-
-/* 82577 specific PHY registers */
-#define I82577_PHY_CTRL_2		18
-#define I82577_PHY_LBK_CTRL		19
-#define I82577_PHY_STATUS_2		26
-#define I82577_PHY_DIAG_STATUS		31
-
-/* I82577 PHY Status 2 */
-#define I82577_PHY_STATUS2_REV_POLARITY		0x0400
-#define I82577_PHY_STATUS2_MDIX			0x0800
-#define I82577_PHY_STATUS2_SPEED_MASK		0x0300
-#define I82577_PHY_STATUS2_SPEED_1000MBPS	0x0200
-
-/* I82577 PHY Control 2 */
-#define I82577_PHY_CTRL2_MANUAL_MDIX		0x0200
-#define I82577_PHY_CTRL2_AUTO_MDI_MDIX		0x0400
-#define I82577_PHY_CTRL2_MDIX_CFG_MASK		0x0600
-
-/* I82577 PHY Diagnostics Status */
-#define I82577_DSTATUS_CABLE_LENGTH		0x03FC
-#define I82577_DSTATUS_CABLE_LENGTH_SHIFT	2
-
-/* 82580 PHY Power Management */
-#define E1000_82580_PHY_POWER_MGMT	0xE14
-#define E1000_82580_PM_SPD		0x0001 /* Smart Power Down */
-#define E1000_82580_PM_D0_LPLU		0x0002 /* For D0a states */
-#define E1000_82580_PM_D3_LPLU		0x0004 /* For all other states */
-#define E1000_82580_PM_GO_LINKD		0x0020 /* Go Link Disconnect */
-
-#define E1000_MPHY_DIS_ACCESS		0x80000000 /* disable_access bit */
-#define E1000_MPHY_ENA_ACCESS		0x40000000 /* enable_access bit */
-#define E1000_MPHY_BUSY			0x00010000 /* busy bit */
-#define E1000_MPHY_ADDRESS_FNC_OVERRIDE	0x20000000 /* fnc_override bit */
-#define E1000_MPHY_ADDRESS_MASK		0x0000FFFF /* address mask */
-
-#define IGP01E1000_PHY_PCS_INIT_REG	0x00B4
-#define IGP01E1000_PHY_POLARITY_MASK	0x0078
-
-#define IGP01E1000_PSCR_AUTO_MDIX	0x1000
-#define IGP01E1000_PSCR_FORCE_MDI_MDIX	0x2000 /* 0=MDI, 1=MDIX */
-
-#define IGP01E1000_PSCFR_SMART_SPEED	0x0080
-
-#define IGP02E1000_PM_SPD		0x0001 /* Smart Power Down */
-#define IGP02E1000_PM_D0_LPLU		0x0002 /* For D0a states */
-#define IGP02E1000_PM_D3_LPLU		0x0004 /* For all other states */
-
-#define IGP01E1000_PLHR_SS_DOWNGRADE	0x8000
-
-#define IGP01E1000_PSSR_POLARITY_REVERSED	0x0002
-#define IGP01E1000_PSSR_MDIX		0x0800
-#define IGP01E1000_PSSR_SPEED_MASK	0xC000
-#define IGP01E1000_PSSR_SPEED_1000MBPS	0xC000
-
-#define IGP02E1000_PHY_CHANNEL_NUM	4
-#define IGP02E1000_PHY_AGC_A		0x11B1
-#define IGP02E1000_PHY_AGC_B		0x12B1
-#define IGP02E1000_PHY_AGC_C		0x14B1
-#define IGP02E1000_PHY_AGC_D		0x18B1
-
-#define IGP02E1000_AGC_LENGTH_SHIFT	9   /* Course=15:13, Fine=12:9 */
-#define IGP02E1000_AGC_LENGTH_MASK	0x7F
-#define IGP02E1000_AGC_RANGE		15
-
-#define E1000_CABLE_LENGTH_UNDEFINED	0xFF
-
-#define E1000_KMRNCTRLSTA_OFFSET	0x001F0000
-#define E1000_KMRNCTRLSTA_OFFSET_SHIFT	16
-#define E1000_KMRNCTRLSTA_REN		0x00200000
-#define E1000_KMRNCTRLSTA_DIAG_OFFSET	0x3    /* Kumeran Diagnostic */
-#define E1000_KMRNCTRLSTA_TIMEOUTS	0x4    /* Kumeran Timeouts */
-#define E1000_KMRNCTRLSTA_INBAND_PARAM	0x9    /* Kumeran InBand Parameters */
-#define E1000_KMRNCTRLSTA_IBIST_DISABLE	0x0200 /* Kumeran IBIST Disable */
-#define E1000_KMRNCTRLSTA_DIAG_NELPBK	0x1000 /* Nearend Loopback mode */
-
-#define IFE_PHY_EXTENDED_STATUS_CONTROL	0x10
-#define IFE_PHY_SPECIAL_CONTROL		0x11 /* 100BaseTx PHY Special Ctrl */
-#define IFE_PHY_SPECIAL_CONTROL_LED	0x1B /* PHY Special and LED Ctrl */
-#define IFE_PHY_MDIX_CONTROL		0x1C /* MDI/MDI-X Control */
-
-/* IFE PHY Extended Status Control */
-#define IFE_PESC_POLARITY_REVERSED	0x0100
-
-/* IFE PHY Special Control */
-#define IFE_PSC_AUTO_POLARITY_DISABLE	0x0010
-#define IFE_PSC_FORCE_POLARITY		0x0020
-
-/* IFE PHY Special Control and LED Control */
-#define IFE_PSCL_PROBE_MODE		0x0020
-#define IFE_PSCL_PROBE_LEDS_OFF		0x0006 /* Force LEDs 0 and 2 off */
-#define IFE_PSCL_PROBE_LEDS_ON		0x0007 /* Force LEDs 0 and 2 on */
-
-/* IFE PHY MDIX Control */
-#define IFE_PMC_MDIX_STATUS		0x0020 /* 1=MDI-X, 0=MDI */
-#define IFE_PMC_FORCE_MDIX		0x0040 /* 1=force MDI-X, 0=force MDI */
-#define IFE_PMC_AUTO_MDIX		0x0080 /* 1=enable auto, 0=disable */
-
-/* SFP modules ID memory locations */
-#define E1000_SFF_IDENTIFIER_OFFSET	0x00
-#define E1000_SFF_IDENTIFIER_SFF	0x02
-#define E1000_SFF_IDENTIFIER_SFP	0x03
-
-#define E1000_SFF_ETH_FLAGS_OFFSET	0x06
-/* Flags for SFP modules compatible with ETH up to 1Gb */
-struct sfp_e1000_flags {
-	u8 e1000_base_sx:1;
-	u8 e1000_base_lx:1;
-	u8 e1000_base_cx:1;
-	u8 e1000_base_t:1;
-	u8 e100_base_lx:1;
-	u8 e100_base_fx:1;
-	u8 e10_base_bx10:1;
-	u8 e10_base_px:1;
-};
-
-/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
-#define E1000_SFF_VENDOR_OUI_TYCO	0x00407600
-#define E1000_SFF_VENDOR_OUI_FTL	0x00906500
-#define E1000_SFF_VENDOR_OUI_AVAGO	0x00176A00
-#define E1000_SFF_VENDOR_OUI_INTEL	0x001B2100
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
deleted file mode 100644
index f5c7e031..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
+++ /dev/null
@@ -1,631 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _E1000_REGS_H_
-#define _E1000_REGS_H_
-
-#define E1000_CTRL	0x00000  /* Device Control - RW */
-#define E1000_STATUS	0x00008  /* Device Status - RO */
-#define E1000_EECD	0x00010  /* EEPROM/Flash Control - RW */
-#define E1000_EERD	0x00014  /* EEPROM Read - RW */
-#define E1000_CTRL_EXT	0x00018  /* Extended Device Control - RW */
-#define E1000_FLA	0x0001C  /* Flash Access - RW */
-#define E1000_MDIC	0x00020  /* MDI Control - RW */
-#define E1000_MDICNFG	0x00E04  /* MDI Config - RW */
-#define E1000_REGISTER_SET_SIZE		0x20000 /* CSR Size */
-#define E1000_EEPROM_INIT_CTRL_WORD_2	0x0F /* EEPROM Init Ctrl Word 2 */
-#define E1000_EEPROM_PCIE_CTRL_WORD_2	0x28 /* EEPROM PCIe Ctrl Word 2 */
-#define E1000_BARCTRL			0x5BBC /* BAR ctrl reg */
-#define E1000_BARCTRL_FLSIZE		0x0700 /* BAR ctrl Flsize */
-#define E1000_BARCTRL_CSRSIZE		0x2000 /* BAR ctrl CSR size */
-#define E1000_MPHY_ADDR_CTRL	0x0024 /* GbE MPHY Address Control */
-#define E1000_MPHY_DATA		0x0E10 /* GBE MPHY Data */
-#define E1000_MPHY_STAT		0x0E0C /* GBE MPHY Statistics */
-#define E1000_PPHY_CTRL		0x5b48 /* PCIe PHY Control */
-#define E1000_I350_BARCTRL		0x5BFC /* BAR ctrl reg */
-#define E1000_I350_DTXMXPKTSZ		0x355C /* Maximum sent packet size reg*/
-#define E1000_SCTL	0x00024  /* SerDes Control - RW */
-#define E1000_FCAL	0x00028  /* Flow Control Address Low - RW */
-#define E1000_FCAH	0x0002C  /* Flow Control Address High -RW */
-#define E1000_FCT	0x00030  /* Flow Control Type - RW */
-#define E1000_CONNSW	0x00034  /* Copper/Fiber switch control - RW */
-#define E1000_VET	0x00038  /* VLAN Ether Type - RW */
-#define E1000_ICR	0x000C0  /* Interrupt Cause Read - R/clr */
-#define E1000_ITR	0x000C4  /* Interrupt Throttling Rate - RW */
-#define E1000_ICS	0x000C8  /* Interrupt Cause Set - WO */
-#define E1000_IMS	0x000D0  /* Interrupt Mask Set - RW */
-#define E1000_IMC	0x000D8  /* Interrupt Mask Clear - WO */
-#define E1000_IAM	0x000E0  /* Interrupt Acknowledge Auto Mask */
-#define E1000_RCTL	0x00100  /* Rx Control - RW */
-#define E1000_FCTTV	0x00170  /* Flow Control Transmit Timer Value - RW */
-#define E1000_TXCW	0x00178  /* Tx Configuration Word - RW */
-#define E1000_RXCW	0x00180  /* Rx Configuration Word - RO */
-#define E1000_EICR	0x01580  /* Ext. Interrupt Cause Read - R/clr */
-#define E1000_EITR(_n)	(0x01680 + (0x4 * (_n)))
-#define E1000_EICS	0x01520  /* Ext. Interrupt Cause Set - W0 */
-#define E1000_EIMS	0x01524  /* Ext. Interrupt Mask Set/Read - RW */
-#define E1000_EIMC	0x01528  /* Ext. Interrupt Mask Clear - WO */
-#define E1000_EIAC	0x0152C  /* Ext. Interrupt Auto Clear - RW */
-#define E1000_EIAM	0x01530  /* Ext. Interrupt Ack Auto Clear Mask - RW */
-#define E1000_GPIE	0x01514  /* General Purpose Interrupt Enable - RW */
-#define E1000_IVAR0	0x01700  /* Interrupt Vector Allocation (array) - RW */
-#define E1000_IVAR_MISC	0x01740 /* IVAR for "other" causes - RW */
-#define E1000_TCTL	0x00400  /* Tx Control - RW */
-#define E1000_TCTL_EXT	0x00404  /* Extended Tx Control - RW */
-#define E1000_TIPG	0x00410  /* Tx Inter-packet gap -RW */
-#define E1000_AIT	0x00458  /* Adaptive Interframe Spacing Throttle - RW */
-#define E1000_LEDCTL	0x00E00  /* LED Control - RW */
-#define E1000_LEDMUX	0x08130  /* LED MUX Control */
-#define E1000_EXTCNF_CTRL	0x00F00  /* Extended Configuration Control */
-#define E1000_EXTCNF_SIZE	0x00F08  /* Extended Configuration Size */
-#define E1000_PHY_CTRL	0x00F10  /* PHY Control Register in CSR */
-#define E1000_PBA	0x01000  /* Packet Buffer Allocation - RW */
-#define E1000_PBS	0x01008  /* Packet Buffer Size */
-#define E1000_EEMNGCTL	0x01010  /* MNG EEprom Control */
-#define E1000_EEARBC	0x01024  /* EEPROM Auto Read Bus Control */
-#define E1000_EEWR	0x0102C  /* EEPROM Write Register - RW */
-#define E1000_FLOP	0x0103C  /* FLASH Opcode Register */
-#define E1000_I2CCMD	0x01028  /* SFPI2C Command Register - RW */
-#define E1000_I2CPARAMS	0x0102C /* SFPI2C Parameters Register - RW */
-#define E1000_I2CBB_EN	0x00000100  /* I2C - Bit Bang Enable */
-#define E1000_I2C_CLK_OUT	0x00000200  /* I2C- Clock */
-#define E1000_I2C_DATA_OUT	0x00000400  /* I2C- Data Out */
-#define E1000_I2C_DATA_OE_N	0x00000800  /* I2C- Data Output Enable */
-#define E1000_I2C_DATA_IN	0x00001000  /* I2C- Data In */
-#define E1000_I2C_CLK_OE_N	0x00002000  /* I2C- Clock Output Enable */
-#define E1000_I2C_CLK_IN	0x00004000  /* I2C- Clock In */
-#define E1000_I2C_CLK_STRETCH_DIS	0x00008000 /* I2C- Dis Clk Stretching */
-#define E1000_WDSTP	0x01040  /* Watchdog Setup - RW */
-#define E1000_SWDSTS	0x01044  /* SW Device Status - RW */
-#define E1000_FRTIMER	0x01048  /* Free Running Timer - RW */
-#define E1000_TCPTIMER	0x0104C  /* TCP Timer - RW */
-#define E1000_VPDDIAG	0x01060  /* VPD Diagnostic - RO */
-#define E1000_ICR_V2	0x01500  /* Intr Cause - new location - RC */
-#define E1000_ICS_V2	0x01504  /* Intr Cause Set - new location - WO */
-#define E1000_IMS_V2	0x01508  /* Intr Mask Set/Read - new location - RW */
-#define E1000_IMC_V2	0x0150C  /* Intr Mask Clear - new location - WO */
-#define E1000_IAM_V2	0x01510  /* Intr Ack Auto Mask - new location - RW */
-#define E1000_ERT	0x02008  /* Early Rx Threshold - RW */
-#define E1000_FCRTL	0x02160  /* Flow Control Receive Threshold Low - RW */
-#define E1000_FCRTH	0x02168  /* Flow Control Receive Threshold High - RW */
-#define E1000_PSRCTL	0x02170  /* Packet Split Receive Control - RW */
-#define E1000_RDFH	0x02410  /* Rx Data FIFO Head - RW */
-#define E1000_RDFT	0x02418  /* Rx Data FIFO Tail - RW */
-#define E1000_RDFHS	0x02420  /* Rx Data FIFO Head Saved - RW */
-#define E1000_RDFTS	0x02428  /* Rx Data FIFO Tail Saved - RW */
-#define E1000_RDFPC	0x02430  /* Rx Data FIFO Packet Count - RW */
-#define E1000_PBRTH	0x02458  /* PB Rx Arbitration Threshold - RW */
-#define E1000_FCRTV	0x02460  /* Flow Control Refresh Timer Value - RW */
-/* Split and Replication Rx Control - RW */
-#define E1000_RDPUMB	0x025CC  /* DMA Rx Descriptor uC Mailbox - RW */
-#define E1000_RDPUAD	0x025D0  /* DMA Rx Descriptor uC Addr Command - RW */
-#define E1000_RDPUWD	0x025D4  /* DMA Rx Descriptor uC Data Write - RW */
-#define E1000_RDPURD	0x025D8  /* DMA Rx Descriptor uC Data Read - RW */
-#define E1000_RDPUCTL	0x025DC  /* DMA Rx Descriptor uC Control - RW */
-#define E1000_PBDIAG	0x02458  /* Packet Buffer Diagnostic - RW */
-#define E1000_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
-#define E1000_IRPBS	0x02404 /* Same as RXPBS, renamed for newer Si - RW */
-#define E1000_PBRWAC	0x024E8 /* Rx packet buffer wrap around counter - RO */
-#define E1000_RDTR	0x02820  /* Rx Delay Timer - RW */
-#define E1000_RADV	0x0282C  /* Rx Interrupt Absolute Delay Timer - RW */
-#define E1000_EMIADD	0x10     /* Extended Memory Indirect Address */
-#define E1000_EMIDATA	0x11     /* Extended Memory Indirect Data */
-#define E1000_SRWR		0x12018  /* Shadow Ram Write Register - RW */
-#define E1000_I210_FLMNGCTL	0x12038
-#define E1000_I210_FLMNGDATA	0x1203C
-#define E1000_I210_FLMNGCNT	0x12040
-
-#define E1000_I210_FLSWCTL	0x12048
-#define E1000_I210_FLSWDATA	0x1204C
-#define E1000_I210_FLSWCNT	0x12050
-
-#define E1000_I210_FLA		0x1201C
-
-#define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
-#define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
-
-/* QAV Tx mode control register */
-#define E1000_I210_TQAVCTRL	0x3570
-
-/* QAV Tx mode control register bitfields masks */
-/* QAV enable */
-#define E1000_TQAVCTRL_MODE			(1 << 0)
-/* Fetching arbitration type */
-#define E1000_TQAVCTRL_FETCH_ARB		(1 << 4)
-/* Fetching timer enable */
-#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE	(1 << 5)
-/* Launch arbitration type */
-#define E1000_TQAVCTRL_LAUNCH_ARB		(1 << 8)
-/* Launch timer enable */
-#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE	(1 << 9)
-/* SP waits for SR enable */
-#define E1000_TQAVCTRL_SP_WAIT_SR		(1 << 10)
-/* Fetching timer correction */
-#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET	16
-#define E1000_TQAVCTRL_FETCH_TIMER_DELTA	\
-			(0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET)
-
-/* High credit registers where _n can be 0 or 1. */
-#define E1000_I210_TQAVHC(_n)			(0x300C + 0x40 * (_n))
-
-/* Queues fetch arbitration priority control register */
-#define E1000_I210_TQAVARBCTRL			0x3574
-/* Queues priority masks where _n and _p can be 0-3. */
-#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p)	((_p) << (2 * _n))
-/* QAV Tx mode control registers where _n can be 0 or 1. */
-#define E1000_I210_TQAVCC(_n)			(0x3004 + 0x40 * (_n))
-
-/* QAV Tx mode control register bitfields masks */
-#define E1000_TQAVCC_IDLE_SLOPE		0xFFFF /* Idle slope */
-#define E1000_TQAVCC_KEEP_CREDITS	(1 << 30) /* Keep credits opt enable */
-#define E1000_TQAVCC_QUEUE_MODE		(1 << 31) /* SP vs. SR Tx mode */
-
-/* Good transmitted packets counter registers */
-#define E1000_PQGPTC(_n)		(0x010014 + (0x100 * (_n)))
-
-/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */
-#define E1000_I210_TXPBS_SIZE(_n, _s)	((_s) << (6 * _n))
-
-#define E1000_MMDAC			13 /* MMD Access Control */
-#define E1000_MMDAAD			14 /* MMD Access Address/Data */
-
-/* Convenience macros
- *
- * Note: "_n" is the queue number of the register to be written to.
- *
- * Example usage:
- * E1000_RDBAL_REG(current_rx_queue)
- */
-#define E1000_RDBAL(_n)	((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \
-			 (0x0C000 + ((_n) * 0x40)))
-#define E1000_RDBAH(_n)	((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \
-			 (0x0C004 + ((_n) * 0x40)))
-#define E1000_RDLEN(_n)	((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \
-			 (0x0C008 + ((_n) * 0x40)))
-#define E1000_SRRCTL(_n)	((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \
-				 (0x0C00C + ((_n) * 0x40)))
-#define E1000_RDH(_n)	((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \
-			 (0x0C010 + ((_n) * 0x40)))
-#define E1000_RXCTL(_n)	((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \
-			 (0x0C014 + ((_n) * 0x40)))
-#define E1000_DCA_RXCTRL(_n)	E1000_RXCTL(_n)
-#define E1000_RDT(_n)	((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \
-			 (0x0C018 + ((_n) * 0x40)))
-#define E1000_RXDCTL(_n)	((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \
-				 (0x0C028 + ((_n) * 0x40)))
-#define E1000_RQDPC(_n)	((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \
-			 (0x0C030 + ((_n) * 0x40)))
-#define E1000_TDBAL(_n)	((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \
-			 (0x0E000 + ((_n) * 0x40)))
-#define E1000_TDBAH(_n)	((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \
-			 (0x0E004 + ((_n) * 0x40)))
-#define E1000_TDLEN(_n)	((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \
-			 (0x0E008 + ((_n) * 0x40)))
-#define E1000_TDH(_n)	((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \
-			 (0x0E010 + ((_n) * 0x40)))
-#define E1000_TXCTL(_n)	((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \
-			 (0x0E014 + ((_n) * 0x40)))
-#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n)
-#define E1000_TDT(_n)	((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \
-			 (0x0E018 + ((_n) * 0x40)))
-#define E1000_TXDCTL(_n)	((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \
-				 (0x0E028 + ((_n) * 0x40)))
-#define E1000_TDWBAL(_n)	((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \
-				 (0x0E038 + ((_n) * 0x40)))
-#define E1000_TDWBAH(_n)	((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \
-				 (0x0E03C + ((_n) * 0x40)))
-#define E1000_TARC(_n)		(0x03840 + ((_n) * 0x100))
-#define E1000_RSRPD		0x02C00  /* Rx Small Packet Detect - RW */
-#define E1000_RAID		0x02C08  /* Receive Ack Interrupt Delay - RW */
-#define E1000_KABGTXD		0x03004  /* AFE Band Gap Transmit Ref Data */
-#define E1000_PSRTYPE(_i)	(0x05480 + ((_i) * 4))
-#define E1000_RAL(_i)		(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
-				 (0x054E0 + ((_i - 16) * 8)))
-#define E1000_RAH(_i)		(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
-				 (0x054E4 + ((_i - 16) * 8)))
-#define E1000_SHRAL(_i)		(0x05438 + ((_i) * 8))
-#define E1000_SHRAH(_i)		(0x0543C + ((_i) * 8))
-#define E1000_IP4AT_REG(_i)	(0x05840 + ((_i) * 8))
-#define E1000_IP6AT_REG(_i)	(0x05880 + ((_i) * 4))
-#define E1000_WUPM_REG(_i)	(0x05A00 + ((_i) * 4))
-#define E1000_FFMT_REG(_i)	(0x09000 + ((_i) * 8))
-#define E1000_FFVT_REG(_i)	(0x09800 + ((_i) * 8))
-#define E1000_FFLT_REG(_i)	(0x05F00 + ((_i) * 8))
-#define E1000_PBSLAC		0x03100  /* Pkt Buffer Slave Access Control */
-#define E1000_PBSLAD(_n)	(0x03110 + (0x4 * (_n)))  /* Pkt Buffer DWORD */
-#define E1000_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
-/* Same as TXPBS, renamed for newer Si - RW */
-#define E1000_ITPBS		0x03404
-#define E1000_TDFH		0x03410  /* Tx Data FIFO Head - RW */
-#define E1000_TDFT		0x03418  /* Tx Data FIFO Tail - RW */
-#define E1000_TDFHS		0x03420  /* Tx Data FIFO Head Saved - RW */
-#define E1000_TDFTS		0x03428  /* Tx Data FIFO Tail Saved - RW */
-#define E1000_TDFPC		0x03430  /* Tx Data FIFO Packet Count - RW */
-#define E1000_TDPUMB		0x0357C  /* DMA Tx Desc uC Mail Box - RW */
-#define E1000_TDPUAD		0x03580  /* DMA Tx Desc uC Addr Command - RW */
-#define E1000_TDPUWD		0x03584  /* DMA Tx Desc uC Data Write - RW */
-#define E1000_TDPURD		0x03588  /* DMA Tx Desc uC Data  Read  - RW */
-#define E1000_TDPUCTL		0x0358C  /* DMA Tx Desc uC Control - RW */
-#define E1000_DTXCTL		0x03590  /* DMA Tx Control - RW */
-#define E1000_DTXTCPFLGL	0x0359C /* DMA Tx Control flag low - RW */
-#define E1000_DTXTCPFLGH	0x035A0 /* DMA Tx Control flag high - RW */
-/* DMA Tx Max Total Allow Size Reqs - RW */
-#define E1000_DTXMXSZRQ		0x03540
-#define E1000_TIDV	0x03820  /* Tx Interrupt Delay Value - RW */
-#define E1000_TADV	0x0382C  /* Tx Interrupt Absolute Delay Val - RW */
-#define E1000_CRCERRS	0x04000  /* CRC Error Count - R/clr */
-#define E1000_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
-#define E1000_SYMERRS	0x04008  /* Symbol Error Count - R/clr */
-#define E1000_RXERRC	0x0400C  /* Receive Error Count - R/clr */
-#define E1000_MPC	0x04010  /* Missed Packet Count - R/clr */
-#define E1000_SCC	0x04014  /* Single Collision Count - R/clr */
-#define E1000_ECOL	0x04018  /* Excessive Collision Count - R/clr */
-#define E1000_MCC	0x0401C  /* Multiple Collision Count - R/clr */
-#define E1000_LATECOL	0x04020  /* Late Collision Count - R/clr */
-#define E1000_COLC	0x04028  /* Collision Count - R/clr */
-#define E1000_DC	0x04030  /* Defer Count - R/clr */
-#define E1000_TNCRS	0x04034  /* Tx-No CRS - R/clr */
-#define E1000_SEC	0x04038  /* Sequence Error Count - R/clr */
-#define E1000_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
-#define E1000_RLEC	0x04040  /* Receive Length Error Count - R/clr */
-#define E1000_XONRXC	0x04048  /* XON Rx Count - R/clr */
-#define E1000_XONTXC	0x0404C  /* XON Tx Count - R/clr */
-#define E1000_XOFFRXC	0x04050  /* XOFF Rx Count - R/clr */
-#define E1000_XOFFTXC	0x04054  /* XOFF Tx Count - R/clr */
-#define E1000_FCRUC	0x04058  /* Flow Control Rx Unsupported Count- R/clr */
-#define E1000_PRC64	0x0405C  /* Packets Rx (64 bytes) - R/clr */
-#define E1000_PRC127	0x04060  /* Packets Rx (65-127 bytes) - R/clr */
-#define E1000_PRC255	0x04064  /* Packets Rx (128-255 bytes) - R/clr */
-#define E1000_PRC511	0x04068  /* Packets Rx (255-511 bytes) - R/clr */
-#define E1000_PRC1023	0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
-#define E1000_PRC1522	0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
-#define E1000_GPRC	0x04074  /* Good Packets Rx Count - R/clr */
-#define E1000_BPRC	0x04078  /* Broadcast Packets Rx Count - R/clr */
-#define E1000_MPRC	0x0407C  /* Multicast Packets Rx Count - R/clr */
-#define E1000_GPTC	0x04080  /* Good Packets Tx Count - R/clr */
-#define E1000_GORCL	0x04088  /* Good Octets Rx Count Low - R/clr */
-#define E1000_GORCH	0x0408C  /* Good Octets Rx Count High - R/clr */
-#define E1000_GOTCL	0x04090  /* Good Octets Tx Count Low - R/clr */
-#define E1000_GOTCH	0x04094  /* Good Octets Tx Count High - R/clr */
-#define E1000_RNBC	0x040A0  /* Rx No Buffers Count - R/clr */
-#define E1000_RUC	0x040A4  /* Rx Undersize Count - R/clr */
-#define E1000_RFC	0x040A8  /* Rx Fragment Count - R/clr */
-#define E1000_ROC	0x040AC  /* Rx Oversize Count - R/clr */
-#define E1000_RJC	0x040B0  /* Rx Jabber Count - R/clr */
-#define E1000_MGTPRC	0x040B4  /* Management Packets Rx Count - R/clr */
-#define E1000_MGTPDC	0x040B8  /* Management Packets Dropped Count - R/clr */
-#define E1000_MGTPTC	0x040BC  /* Management Packets Tx Count - R/clr */
-#define E1000_TORL	0x040C0  /* Total Octets Rx Low - R/clr */
-#define E1000_TORH	0x040C4  /* Total Octets Rx High - R/clr */
-#define E1000_TOTL	0x040C8  /* Total Octets Tx Low - R/clr */
-#define E1000_TOTH	0x040CC  /* Total Octets Tx High - R/clr */
-#define E1000_TPR	0x040D0  /* Total Packets Rx - R/clr */
-#define E1000_TPT	0x040D4  /* Total Packets Tx - R/clr */
-#define E1000_PTC64	0x040D8  /* Packets Tx (64 bytes) - R/clr */
-#define E1000_PTC127	0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
-#define E1000_PTC255	0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
-#define E1000_PTC511	0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
-#define E1000_PTC1023	0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
-#define E1000_PTC1522	0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
-#define E1000_MPTC	0x040F0  /* Multicast Packets Tx Count - R/clr */
-#define E1000_BPTC	0x040F4  /* Broadcast Packets Tx Count - R/clr */
-#define E1000_TSCTC	0x040F8  /* TCP Segmentation Context Tx - R/clr */
-#define E1000_TSCTFC	0x040FC  /* TCP Segmentation Context Tx Fail - R/clr */
-#define E1000_IAC	0x04100  /* Interrupt Assertion Count */
-#define E1000_ICRXPTC	0x04104  /* Interrupt Cause Rx Pkt Timer Expire Count */
-#define E1000_ICRXATC	0x04108  /* Interrupt Cause Rx Abs Timer Expire Count */
-#define E1000_ICTXPTC	0x0410C  /* Interrupt Cause Tx Pkt Timer Expire Count */
-#define E1000_ICTXATC	0x04110  /* Interrupt Cause Tx Abs Timer Expire Count */
-#define E1000_ICTXQEC	0x04118  /* Interrupt Cause Tx Queue Empty Count */
-#define E1000_ICTXQMTC	0x0411C  /* Interrupt Cause Tx Queue Min Thresh Count */
-#define E1000_ICRXDMTC	0x04120  /* Interrupt Cause Rx Desc Min Thresh Count */
-#define E1000_ICRXOC	0x04124  /* Interrupt Cause Receiver Overrun Count */
-
-/* Virtualization statistical counters */
-#define E1000_PFVFGPRC(_n)	(0x010010 + (0x100 * (_n)))
-#define E1000_PFVFGPTC(_n)	(0x010014 + (0x100 * (_n)))
-#define E1000_PFVFGORC(_n)	(0x010018 + (0x100 * (_n)))
-#define E1000_PFVFGOTC(_n)	(0x010034 + (0x100 * (_n)))
-#define E1000_PFVFMPRC(_n)	(0x010038 + (0x100 * (_n)))
-#define E1000_PFVFGPRLBC(_n)	(0x010040 + (0x100 * (_n)))
-#define E1000_PFVFGPTLBC(_n)	(0x010044 + (0x100 * (_n)))
-#define E1000_PFVFGORLBC(_n)	(0x010048 + (0x100 * (_n)))
-#define E1000_PFVFGOTLBC(_n)	(0x010050 + (0x100 * (_n)))
-
-/* LinkSec */
-#define E1000_LSECTXUT		0x04300  /* Tx Untagged Pkt Cnt */
-#define E1000_LSECTXPKTE	0x04304  /* Encrypted Tx Pkts Cnt */
-#define E1000_LSECTXPKTP	0x04308  /* Protected Tx Pkt Cnt */
-#define E1000_LSECTXOCTE	0x0430C  /* Encrypted Tx Octets Cnt */
-#define E1000_LSECTXOCTP	0x04310  /* Protected Tx Octets Cnt */
-#define E1000_LSECRXUT		0x04314  /* Untagged non-Strict Rx Pkt Cnt */
-#define E1000_LSECRXOCTD	0x0431C  /* Rx Octets Decrypted Count */
-#define E1000_LSECRXOCTV	0x04320  /* Rx Octets Validated */
-#define E1000_LSECRXBAD		0x04324  /* Rx Bad Tag */
-#define E1000_LSECRXNOSCI	0x04328  /* Rx Packet No SCI Count */
-#define E1000_LSECRXUNSCI	0x0432C  /* Rx Packet Unknown SCI Count */
-#define E1000_LSECRXUNCH	0x04330  /* Rx Unchecked Packets Count */
-#define E1000_LSECRXDELAY	0x04340  /* Rx Delayed Packet Count */
-#define E1000_LSECRXLATE	0x04350  /* Rx Late Packets Count */
-#define E1000_LSECRXOK(_n)	(0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */
-#define E1000_LSECRXINV(_n)	(0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */
-#define E1000_LSECRXNV(_n)	(0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */
-#define E1000_LSECRXUNSA	0x043C0  /* Rx Unused SA Count */
-#define E1000_LSECRXNUSA	0x043D0  /* Rx Not Using SA Count */
-#define E1000_LSECTXCAP		0x0B000  /* Tx Capabilities Register - RO */
-#define E1000_LSECRXCAP		0x0B300  /* Rx Capabilities Register - RO */
-#define E1000_LSECTXCTRL	0x0B004  /* Tx Control - RW */
-#define E1000_LSECRXCTRL	0x0B304  /* Rx Control - RW */
-#define E1000_LSECTXSCL		0x0B008  /* Tx SCI Low - RW */
-#define E1000_LSECTXSCH		0x0B00C  /* Tx SCI High - RW */
-#define E1000_LSECTXSA		0x0B010  /* Tx SA0 - RW */
-#define E1000_LSECTXPN0		0x0B018  /* Tx SA PN 0 - RW */
-#define E1000_LSECTXPN1		0x0B01C  /* Tx SA PN 1 - RW */
-#define E1000_LSECRXSCL		0x0B3D0  /* Rx SCI Low - RW */
-#define E1000_LSECRXSCH		0x0B3E0  /* Rx SCI High - RW */
-/* LinkSec Tx 128-bit Key 0 - WO */
-#define E1000_LSECTXKEY0(_n)	(0x0B020 + (0x04 * (_n)))
-/* LinkSec Tx 128-bit Key 1 - WO */
-#define E1000_LSECTXKEY1(_n)	(0x0B030 + (0x04 * (_n)))
-#define E1000_LSECRXSA(_n)	(0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */
-#define E1000_LSECRXPN(_n)	(0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */
-/* LinkSec Rx Keys  - where _n is the SA no. and _m the 4 dwords of the 128 bit
- * key - RW.
- */
-#define E1000_LSECRXKEY(_n, _m)	(0x0B350 + (0x10 * (_n)) + (0x04 * (_m)))
-
-#define E1000_SSVPC		0x041A0 /* Switch Security Violation Pkt Cnt */
-#define E1000_IPSCTRL		0xB430  /* IpSec Control Register */
-#define E1000_IPSRXCMD		0x0B408 /* IPSec Rx Command Register - RW */
-#define E1000_IPSRXIDX		0x0B400 /* IPSec Rx Index - RW */
-/* IPSec Rx IPv4/v6 Address - RW */
-#define E1000_IPSRXIPADDR(_n)	(0x0B420 + (0x04 * (_n)))
-/* IPSec Rx 128-bit Key - RW */
-#define E1000_IPSRXKEY(_n)	(0x0B410 + (0x04 * (_n)))
-#define E1000_IPSRXSALT		0x0B404  /* IPSec Rx Salt - RW */
-#define E1000_IPSRXSPI		0x0B40C  /* IPSec Rx SPI - RW */
-/* IPSec Tx 128-bit Key - RW */
-#define E1000_IPSTXKEY(_n)	(0x0B460 + (0x04 * (_n)))
-#define E1000_IPSTXSALT		0x0B454  /* IPSec Tx Salt - RW */
-#define E1000_IPSTXIDX		0x0B450  /* IPSec Tx SA IDX - RW */
-#define E1000_PCS_CFG0	0x04200  /* PCS Configuration 0 - RW */
-#define E1000_PCS_LCTL	0x04208  /* PCS Link Control - RW */
-#define E1000_PCS_LSTAT	0x0420C  /* PCS Link Status - RO */
-#define E1000_CBTMPC	0x0402C  /* Circuit Breaker Tx Packet Count */
-#define E1000_HTDPMC	0x0403C  /* Host Transmit Discarded Packets */
-#define E1000_CBRDPC	0x04044  /* Circuit Breaker Rx Dropped Count */
-#define E1000_CBRMPC	0x040FC  /* Circuit Breaker Rx Packet Count */
-#define E1000_RPTHC	0x04104  /* Rx Packets To Host */
-#define E1000_HGPTC	0x04118  /* Host Good Packets Tx Count */
-#define E1000_HTCBDPC	0x04124  /* Host Tx Circuit Breaker Dropped Count */
-#define E1000_HGORCL	0x04128  /* Host Good Octets Received Count Low */
-#define E1000_HGORCH	0x0412C  /* Host Good Octets Received Count High */
-#define E1000_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
-#define E1000_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
-#define E1000_LENERRS	0x04138  /* Length Errors Count */
-#define E1000_SCVPC	0x04228  /* SerDes/SGMII Code Violation Pkt Count */
-#define E1000_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
-#define E1000_PCS_ANADV	0x04218  /* AN advertisement - RW */
-#define E1000_PCS_LPAB	0x0421C  /* Link Partner Ability - RW */
-#define E1000_PCS_NPTX	0x04220  /* AN Next Page Transmit - RW */
-#define E1000_PCS_LPABNP	0x04224 /* Link Partner Ability Next Pg - RW */
-#define E1000_RXCSUM	0x05000  /* Rx Checksum Control - RW */
-#define E1000_RLPML	0x05004  /* Rx Long Packet Max Length */
-#define E1000_RFCTL	0x05008  /* Receive Filter Control*/
-#define E1000_MTA	0x05200  /* Multicast Table Array - RW Array */
-#define E1000_RA	0x05400  /* Receive Address - RW Array */
-#define E1000_RA2	0x054E0  /* 2nd half of Rx address array - RW Array */
-#define E1000_VFTA	0x05600  /* VLAN Filter Table Array - RW Array */
-#define E1000_VT_CTL	0x0581C  /* VMDq Control - RW */
-#define E1000_CIAA	0x05B88  /* Config Indirect Access Address - RW */
-#define E1000_CIAD	0x05B8C  /* Config Indirect Access Data - RW */
-#define E1000_VFQA0	0x0B000  /* VLAN Filter Queue Array 0 - RW Array */
-#define E1000_VFQA1	0x0B200  /* VLAN Filter Queue Array 1 - RW Array */
-#define E1000_WUC	0x05800  /* Wakeup Control - RW */
-#define E1000_WUFC	0x05808  /* Wakeup Filter Control - RW */
-#define E1000_WUS	0x05810  /* Wakeup Status - RO */
-#define E1000_MANC	0x05820  /* Management Control - RW */
-#define E1000_IPAV	0x05838  /* IP Address Valid - RW */
-#define E1000_IP4AT	0x05840  /* IPv4 Address Table - RW Array */
-#define E1000_IP6AT	0x05880  /* IPv6 Address Table - RW Array */
-#define E1000_WUPL	0x05900  /* Wakeup Packet Length - RW */
-#define E1000_WUPM	0x05A00  /* Wakeup Packet Memory - RO A */
-#define E1000_PBACL	0x05B68  /* MSIx PBA Clear - Read/Write 1's to clear */
-#define E1000_FFLT	0x05F00  /* Flexible Filter Length Table - RW Array */
-#define E1000_HOST_IF	0x08800  /* Host Interface */
-#define E1000_FFMT	0x09000  /* Flexible Filter Mask Table - RW Array */
-#define E1000_FFVT	0x09800  /* Flexible Filter Value Table - RW Array */
-#define E1000_HIBBA	0x8F40   /* Host Interface Buffer Base Address */
-/* Flexible Host Filter Table */
-#define E1000_FHFT(_n)	(0x09000 + ((_n) * 0x100))
-/* Ext Flexible Host Filter Table */
-#define E1000_FHFT_EXT(_n)	(0x09A00 + ((_n) * 0x100))
-
-
-#define E1000_KMRNCTRLSTA	0x00034 /* MAC-PHY interface - RW */
-#define E1000_MANC2H		0x05860 /* Management Control To Host - RW */
-/* Management Decision Filters */
-#define E1000_MDEF(_n)		(0x05890 + (4 * (_n)))
-#define E1000_SW_FW_SYNC	0x05B5C /* SW-FW Synchronization - RW */
-#define E1000_CCMCTL	0x05B48 /* CCM Control Register */
-#define E1000_GIOCTL	0x05B44 /* GIO Analog Control Register */
-#define E1000_SCCTL	0x05B4C /* PCIc PLL Configuration Register */
-#define E1000_GCR	0x05B00 /* PCI-Ex Control */
-#define E1000_GCR2	0x05B64 /* PCI-Ex Control #2 */
-#define E1000_GSCL_1	0x05B10 /* PCI-Ex Statistic Control #1 */
-#define E1000_GSCL_2	0x05B14 /* PCI-Ex Statistic Control #2 */
-#define E1000_GSCL_3	0x05B18 /* PCI-Ex Statistic Control #3 */
-#define E1000_GSCL_4	0x05B1C /* PCI-Ex Statistic Control #4 */
-#define E1000_FACTPS	0x05B30 /* Function Active and Power State to MNG */
-#define E1000_SWSM	0x05B50 /* SW Semaphore */
-#define E1000_FWSM	0x05B54 /* FW Semaphore */
-/* Driver-only SW semaphore (not used by BOOT agents) */
-#define E1000_SWSM2	0x05B58
-#define E1000_DCA_ID	0x05B70 /* DCA Requester ID Information - RO */
-#define E1000_DCA_CTRL	0x05B74 /* DCA Control - RW */
-#define E1000_UFUSE	0x05B78 /* UFUSE - RO */
-#define E1000_FFLT_DBG	0x05F04 /* Debug Register */
-#define E1000_HICR	0x08F00 /* Host Interface Control */
-#define E1000_FWSTS	0x08F0C /* FW Status */
-
-/* RSS registers */
-#define E1000_CPUVEC	0x02C10 /* CPU Vector Register - RW */
-#define E1000_MRQC	0x05818 /* Multiple Receive Control - RW */
-#define E1000_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
-#define E1000_IMIREXT(_i)	(0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/
-#define E1000_IMIRVP		0x05AC0 /* Immediate INT Rx VLAN Priority -RW */
-#define E1000_MSIXBM(_i)	(0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */
-#define E1000_RETA(_i)	(0x05C00 + ((_i) * 4)) /* Redirection Table - RW */
-#define E1000_RSSRK(_i)	(0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */
-#define E1000_RSSIM	0x05864 /* RSS Interrupt Mask */
-#define E1000_RSSIR	0x05868 /* RSS Interrupt Request */
-/* VT Registers */
-#define E1000_SWPBS	0x03004 /* Switch Packet Buffer Size - RW */
-#define E1000_MBVFICR	0x00C80 /* Mailbox VF Cause - RWC */
-#define E1000_MBVFIMR	0x00C84 /* Mailbox VF int Mask - RW */
-#define E1000_VFLRE	0x00C88 /* VF Register Events - RWC */
-#define E1000_VFRE	0x00C8C /* VF Receive Enables */
-#define E1000_VFTE	0x00C90 /* VF Transmit Enables */
-#define E1000_QDE	0x02408 /* Queue Drop Enable - RW */
-#define E1000_DTXSWC	0x03500 /* DMA Tx Switch Control - RW */
-#define E1000_WVBR	0x03554 /* VM Wrong Behavior - RWS */
-#define E1000_RPLOLR	0x05AF0 /* Replication Offload - RW */
-#define E1000_UTA	0x0A000 /* Unicast Table Array - RW */
-#define E1000_IOVTCL	0x05BBC /* IOV Control Register */
-#define E1000_VMRCTL	0X05D80 /* Virtual Mirror Rule Control */
-#define E1000_VMRVLAN	0x05D90 /* Virtual Mirror Rule VLAN */
-#define E1000_VMRVM	0x05DA0 /* Virtual Mirror Rule VM */
-#define E1000_MDFB	0x03558 /* Malicious Driver free block */
-#define E1000_LVMMC	0x03548 /* Last VM Misbehavior cause */
-#define E1000_TXSWC	0x05ACC /* Tx Switch Control */
-#define E1000_SCCRL	0x05DB0 /* Storm Control Control */
-#define E1000_BSCTRH	0x05DB8 /* Broadcast Storm Control Threshold */
-#define E1000_MSCTRH	0x05DBC /* Multicast Storm Control Threshold */
-/* These act per VF so an array friendly macro is used */
-#define E1000_V2PMAILBOX(_n)	(0x00C40 + (4 * (_n)))
-#define E1000_P2VMAILBOX(_n)	(0x00C00 + (4 * (_n)))
-#define E1000_VMBMEM(_n)	(0x00800 + (64 * (_n)))
-#define E1000_VFVMBMEM(_n)	(0x00800 + (_n))
-#define E1000_VMOLR(_n)		(0x05AD0 + (4 * (_n)))
-/* VLAN Virtual Machine Filter - RW */
-#define E1000_VLVF(_n)		(0x05D00 + (4 * (_n)))
-#define E1000_VMVIR(_n)		(0x03700 + (4 * (_n)))
-#define E1000_DVMOLR(_n)	(0x0C038 + (0x40 * (_n))) /* DMA VM offload */
-#define E1000_VTCTRL(_n)	(0x10000 + (0x100 * (_n))) /* VT Control */
-#define E1000_TSYNCRXCTL	0x0B620 /* Rx Time Sync Control register - RW */
-#define E1000_TSYNCTXCTL	0x0B614 /* Tx Time Sync Control register - RW */
-#define E1000_TSYNCRXCFG	0x05F50 /* Time Sync Rx Configuration - RW */
-#define E1000_RXSTMPL	0x0B624 /* Rx timestamp Low - RO */
-#define E1000_RXSTMPH	0x0B628 /* Rx timestamp High - RO */
-#define E1000_RXSATRL	0x0B62C /* Rx timestamp attribute low - RO */
-#define E1000_RXSATRH	0x0B630 /* Rx timestamp attribute high - RO */
-#define E1000_TXSTMPL	0x0B618 /* Tx timestamp value Low - RO */
-#define E1000_TXSTMPH	0x0B61C /* Tx timestamp value High - RO */
-#define E1000_SYSTIML	0x0B600 /* System time register Low - RO */
-#define E1000_SYSTIMH	0x0B604 /* System time register High - RO */
-#define E1000_TIMINCA	0x0B608 /* Increment attributes register - RW */
-#define E1000_TIMADJL	0x0B60C /* Time sync time adjustment offset Low - RW */
-#define E1000_TIMADJH	0x0B610 /* Time sync time adjustment offset High - RW */
-#define E1000_TSAUXC	0x0B640 /* Timesync Auxiliary Control register */
-#define E1000_SYSTIMR	0x0B6F8 /* System time register Residue */
-#define E1000_TSICR	0x0B66C /* Interrupt Cause Register */
-#define E1000_TSIM	0x0B674 /* Interrupt Mask Register */
-
-/* Filtering Registers */
-#define E1000_SAQF(_n)	(0x05980 + (4 * (_n))) /* Source Address Queue Fltr */
-#define E1000_DAQF(_n)	(0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */
-#define E1000_SPQF(_n)	(0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */
-#define E1000_FTQF(_n)	(0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */
-#define E1000_TTQF(_n)	(0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */
-#define E1000_SYNQF(_n)	(0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
-#define E1000_ETQF(_n)	(0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
-
-#define E1000_RTTDCS	0x3600 /* Reedtown Tx Desc plane control and status */
-#define E1000_RTTPCS	0x3474 /* Reedtown Tx Packet Plane control and status */
-#define E1000_RTRPCS	0x2474 /* Rx packet plane control and status */
-#define E1000_RTRUP2TC	0x05AC4 /* Rx User Priority to Traffic Class */
-#define E1000_RTTUP2TC	0x0418 /* Transmit User Priority to Traffic Class */
-/* Tx Desc plane TC Rate-scheduler config */
-#define E1000_RTTDTCRC(_n)	(0x3610 + ((_n) * 4))
-/* Tx Packet plane TC Rate-Scheduler Config */
-#define E1000_RTTPTCRC(_n)	(0x3480 + ((_n) * 4))
-/* Rx Packet plane TC Rate-Scheduler Config */
-#define E1000_RTRPTCRC(_n)	(0x2480 + ((_n) * 4))
-/* Tx Desc Plane TC Rate-Scheduler Status */
-#define E1000_RTTDTCRS(_n)	(0x3630 + ((_n) * 4))
-/* Tx Desc Plane TC Rate-Scheduler MMW */
-#define E1000_RTTDTCRM(_n)	(0x3650 + ((_n) * 4))
-/* Tx Packet plane TC Rate-Scheduler Status */
-#define E1000_RTTPTCRS(_n)	(0x34A0 + ((_n) * 4))
-/* Tx Packet plane TC Rate-scheduler MMW */
-#define E1000_RTTPTCRM(_n)	(0x34C0 + ((_n) * 4))
-/* Rx Packet plane TC Rate-Scheduler Status */
-#define E1000_RTRPTCRS(_n)	(0x24A0 + ((_n) * 4))
-/* Rx Packet plane TC Rate-Scheduler MMW */
-#define E1000_RTRPTCRM(_n)	(0x24C0 + ((_n) * 4))
-/* Tx Desc plane VM Rate-Scheduler MMW*/
-#define E1000_RTTDVMRM(_n)	(0x3670 + ((_n) * 4))
-/* Tx BCN Rate-Scheduler MMW */
-#define E1000_RTTBCNRM(_n)	(0x3690 + ((_n) * 4))
-#define E1000_RTTDQSEL	0x3604  /* Tx Desc Plane Queue Select */
-#define E1000_RTTDVMRC	0x3608  /* Tx Desc Plane VM Rate-Scheduler Config */
-#define E1000_RTTDVMRS	0x360C  /* Tx Desc Plane VM Rate-Scheduler Status */
-#define E1000_RTTBCNRC	0x36B0  /* Tx BCN Rate-Scheduler Config */
-#define E1000_RTTBCNRS	0x36B4  /* Tx BCN Rate-Scheduler Status */
-#define E1000_RTTBCNCR	0xB200  /* Tx BCN Control Register */
-#define E1000_RTTBCNTG	0x35A4  /* Tx BCN Tagging */
-#define E1000_RTTBCNCP	0xB208  /* Tx BCN Congestion point */
-#define E1000_RTRBCNCR	0xB20C  /* Rx BCN Control Register */
-#define E1000_RTTBCNRD	0x36B8  /* Tx BCN Rate Drift */
-#define E1000_PFCTOP	0x1080  /* Priority Flow Control Type and Opcode */
-#define E1000_RTTBCNIDX	0xB204  /* Tx BCN Congestion Point */
-#define E1000_RTTBCNACH	0x0B214 /* Tx BCN Control High */
-#define E1000_RTTBCNACL	0x0B210 /* Tx BCN Control Low */
-
-/* DMA Coalescing registers */
-#define E1000_DMACR	0x02508 /* Control Register */
-#define E1000_DMCTXTH	0x03550 /* Transmit Threshold */
-#define E1000_DMCTLX	0x02514 /* Time to Lx Request */
-#define E1000_DMCRTRH	0x05DD0 /* Receive Packet Rate Threshold */
-#define E1000_DMCCNT	0x05DD4 /* Current Rx Count */
-#define E1000_FCRTC	0x02170 /* Flow Control Rx high watermark */
-#define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
-
-/* PCIe Parity Status Register */
-#define E1000_PCIEERRSTS	0x05BA8
-
-#define E1000_PROXYS	0x5F64 /* Proxying Status */
-#define E1000_PROXYFC	0x5F60 /* Proxying Filter Control */
-/* Thermal sensor configuration and status registers */
-#define E1000_THMJT	0x08100 /* Junction Temperature */
-#define E1000_THLOWTC	0x08104 /* Low Threshold Control */
-#define E1000_THMIDTC	0x08108 /* Mid Threshold Control */
-#define E1000_THHIGHTC	0x0810C /* High Threshold Control */
-#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
-
-/* Energy Efficient Ethernet "EEE" registers */
-#define E1000_IPCNFG	0x0E38 /* Internal PHY Configuration */
-#define E1000_LTRC	0x01A0 /* Latency Tolerance Reporting Control */
-#define E1000_EEER	0x0E30 /* Energy Efficient Ethernet "EEE"*/
-#define E1000_EEE_SU	0x0E34 /* EEE Setup */
-#define E1000_TLPIC	0x4148 /* EEE Tx LPI Count - TLPIC */
-#define E1000_RLPIC	0x414C /* EEE Rx LPI Count - RLPIC */
-
-/* OS2BMC Registers */
-#define E1000_B2OSPC	0x08FE0 /* BMC2OS packets sent by BMC */
-#define E1000_B2OGPRC	0x04158 /* BMC2OS packets received by host */
-#define E1000_O2BGPTC	0x08FE4 /* OS2BMC packets received by BMC */
-#define E1000_O2BSPC	0x0415C /* OS2BMC packets transmitted by host */
-
-
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
deleted file mode 100644
index 8aa2a308..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
+++ /dev/null
@@ -1,844 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/* Linux PRO/1000 Ethernet Driver main header file */
-
-#ifndef _IGB_H_
-#define _IGB_H_
-
-#include <linux/kobject.h>
-
-#ifndef IGB_NO_LRO
-#include <net/tcp.h>
-#endif
-
-#undef HAVE_HW_TIME_STAMP
-#ifdef HAVE_HW_TIME_STAMP
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-
-#endif
-#ifdef SIOCETHTOOL
-#include <linux/ethtool.h>
-#endif
-
-struct igb_adapter;
-
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-//#define IGB_DCA
-#endif
-#ifdef IGB_DCA
-#include <linux/dca.h>
-#endif
-
-#include "kcompat.h"
-
-#ifdef HAVE_SCTP
-#include <linux/sctp.h>
-#endif
-
-#include "e1000_api.h"
-#include "e1000_82575.h"
-#include "e1000_manage.h"
-#include "e1000_mbx.h"
-
-#define IGB_ERR(args...) printk(KERN_ERR "igb: " args)
-
-#define PFX "igb: "
-#define DPRINTK(nlevel, klevel, fmt, args...) \
-	(void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
-	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
-		__FUNCTION__ , ## args))
-
-#ifdef HAVE_PTP_1588_CLOCK
-#include <linux/clocksource.h>
-#include <linux/net_tstamp.h>
-#include <linux/ptp_clock_kernel.h>
-#endif /* HAVE_PTP_1588_CLOCK */
-
-#ifdef HAVE_I2C_SUPPORT
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#endif /* HAVE_I2C_SUPPORT */
-
-/* Interrupt defines */
-#define IGB_START_ITR                    648 /* ~6000 ints/sec */
-#define IGB_4K_ITR                       980
-#define IGB_20K_ITR                      196
-#define IGB_70K_ITR                       56
-
-/* Interrupt modes, as used by the IntMode parameter */
-#define IGB_INT_MODE_LEGACY                0
-#define IGB_INT_MODE_MSI                   1
-#define IGB_INT_MODE_MSIX                  2
-
-/* TX/RX descriptor defines */
-#define IGB_DEFAULT_TXD                  256
-#define IGB_DEFAULT_TX_WORK		 128
-#define IGB_MIN_TXD                       80
-#define IGB_MAX_TXD                     4096
-
-#define IGB_DEFAULT_RXD                  256
-#define IGB_MIN_RXD                       80
-#define IGB_MAX_RXD                     4096
-
-#define IGB_MIN_ITR_USECS                 10 /* 100k irq/sec */
-#define IGB_MAX_ITR_USECS               8191 /* 120  irq/sec */
-
-#define NON_Q_VECTORS                      1
-#define MAX_Q_VECTORS                     10
-
-/* Transmit and receive queues */
-#define IGB_MAX_RX_QUEUES                 16
-#define IGB_MAX_TX_QUEUES                 16
-
-#define IGB_MAX_VF_MC_ENTRIES             30
-#define IGB_MAX_VF_FUNCTIONS               8
-#define IGB_82576_VF_DEV_ID           0x10CA
-#define IGB_I350_VF_DEV_ID            0x1520
-#define IGB_MAX_UTA_ENTRIES              128
-#define MAX_EMULATION_MAC_ADDRS           16
-#define OUI_LEN                            3
-#define IGB_MAX_VMDQ_QUEUES                8
-
-
-struct vf_data_storage {
-	unsigned char vf_mac_addresses[ETH_ALEN];
-	u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
-	u16 num_vf_mc_hashes;
-	u16 default_vf_vlan_id;
-	u16 vlans_enabled;
-	unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN];
-	u32 uta_table_copy[IGB_MAX_UTA_ENTRIES];
-	u32 flags;
-	unsigned long last_nack;
-#ifdef IFLA_VF_MAX
-	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
-	u16 pf_qos;
-	u16 tx_rate;
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-	bool spoofchk_enabled;
-#endif
-#endif
-};
-
-#define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
-#define IGB_VF_FLAG_UNI_PROMISC    0x00000002 /* VF has unicast promisc */
-#define IGB_VF_FLAG_MULTI_PROMISC  0x00000004 /* VF has multicast promisc */
-#define IGB_VF_FLAG_PF_SET_MAC     0x00000008 /* PF has set MAC address */
-
-/* RX descriptor control thresholds.
- * PTHRESH - MAC will consider prefetch if it has fewer than this number of
- *           descriptors available in its onboard memory.
- *           Setting this to 0 disables RX descriptor prefetch.
- * HTHRESH - MAC will only prefetch if there are at least this many descriptors
- *           available in host memory.
- *           If PTHRESH is 0, this should also be 0.
- * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
- *           descriptors until either it has this many to write back, or the
- *           ITR timer expires.
- */
-#define IGB_RX_PTHRESH	((hw->mac.type == e1000_i354) ? 12 : 8)
-#define IGB_RX_HTHRESH	8
-#define IGB_TX_PTHRESH	((hw->mac.type == e1000_i354) ? 20 : 8)
-#define IGB_TX_HTHRESH	1
-#define IGB_RX_WTHRESH	((hw->mac.type == e1000_82576 && \
-			  adapter->msix_entries) ? 1 : 4)
-
-/* this is the size past which hardware will drop packets when setting LPE=0 */
-#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
-
-/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we
- * reserve 2 more, and skb_shared_info adds an additional 384 more,
- * this adds roughly 448 bytes of extra data meaning the smallest
- * allocation we could have is 1K.
- * i.e. RXBUFFER_512 --> size-1024 slab
- */
-/* Supported Rx Buffer Sizes */
-#define IGB_RXBUFFER_256   256
-#define IGB_RXBUFFER_2048  2048
-#define IGB_RXBUFFER_16384 16384
-#define IGB_RX_HDR_LEN	   IGB_RXBUFFER_256
-#if MAX_SKB_FRAGS < 8
-#define IGB_RX_BUFSZ	   ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024)
-#else
-#define IGB_RX_BUFSZ	   IGB_RXBUFFER_2048
-#endif
-
-
-/* Packet Buffer allocations */
-#define IGB_PBA_BYTES_SHIFT 0xA
-#define IGB_TX_HEAD_ADDR_SHIFT 7
-#define IGB_PBA_TX_MASK 0xFFFF0000
-
-#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */
-
-/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define IGB_RX_BUFFER_WRITE	16	/* Must be power of 2 */
-
-#define IGB_EEPROM_APME         0x0400
-#define AUTO_ALL_MODES          0
-
-#ifndef IGB_MASTER_SLAVE
-/* Switch to override PHY master/slave setting */
-#define IGB_MASTER_SLAVE	e1000_ms_hw_default
-#endif
-
-#define IGB_MNG_VLAN_NONE -1
-
-#ifndef IGB_NO_LRO
-#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/
-struct igb_lro_stats {
-	u32 flushed;
-	u32 coal;
-};
-
-/*
- * igb_lro_header - header format to be aggregated by LRO
- * @iph: IP header without options
- * @tcp: TCP header
- * @ts:  Optional TCP timestamp data in TCP options
- *
- * This structure relies on the check above that verifies that the header
- * is IPv4 and does not contain any options.
- */
-struct igb_lrohdr {
-	struct iphdr iph;
-	struct tcphdr th;
-	__be32 ts[0];
-};
-
-struct igb_lro_list {
-	struct sk_buff_head active;
-	struct igb_lro_stats stats;
-};
-
-#endif /* IGB_NO_LRO */
-struct igb_cb {
-#ifndef IGB_NO_LRO
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	union {				/* Union defining head/tail partner */
-		struct sk_buff *head;
-		struct sk_buff *tail;
-	};
-#endif
-	__be32	tsecr;			/* timestamp echo response */
-	u32	tsval;			/* timestamp value in host order */
-	u32	next_seq;		/* next expected sequence number */
-	u16	free;			/* 65521 minus total size */
-	u16	mss;			/* size of data portion of packet */
-	u16	append_cnt;		/* number of skb's appended */
-#endif /* IGB_NO_LRO */
-#ifdef HAVE_VLAN_RX_REGISTER
-	u16	vid;			/* VLAN tag */
-#endif
-};
-#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb)
-
-enum igb_tx_flags {
-	/* cmd_type flags */
-	IGB_TX_FLAGS_VLAN	= 0x01,
-	IGB_TX_FLAGS_TSO	= 0x02,
-	IGB_TX_FLAGS_TSTAMP	= 0x04,
-
-	/* olinfo flags */
-	IGB_TX_FLAGS_IPV4	= 0x10,
-	IGB_TX_FLAGS_CSUM	= 0x20,
-};
-
-/* VLAN info */
-#define IGB_TX_FLAGS_VLAN_MASK		0xffff0000
-#define IGB_TX_FLAGS_VLAN_SHIFT		        16
-
-/*
- * The largest size we can write to the descriptor is 65535.  In order to
- * maintain a power of two alignment we have to limit ourselves to 32K.
- */
-#define IGB_MAX_TXD_PWR		15
-#define IGB_MAX_DATA_PER_TXD	(1 << IGB_MAX_TXD_PWR)
-
-/* Tx Descriptors needed, worst case */
-#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD)
-#ifndef MAX_SKB_FRAGS
-#define DESC_NEEDED	4
-#elif (MAX_SKB_FRAGS < 16)
-#define DESC_NEEDED	((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
-#else
-#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
-#endif
-
-/* wrapper around a pointer to a socket buffer,
- * so a DMA handle can be stored along with the buffer */
-struct igb_tx_buffer {
-	union e1000_adv_tx_desc *next_to_watch;
-	unsigned long time_stamp;
-	struct sk_buff *skb;
-	unsigned int bytecount;
-	u16 gso_segs;
-	__be16 protocol;
-	DEFINE_DMA_UNMAP_ADDR(dma);
-	DEFINE_DMA_UNMAP_LEN(len);
-	u32 tx_flags;
-};
-
-struct igb_rx_buffer {
-	dma_addr_t dma;
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	struct sk_buff *skb;
-#else
-	struct page *page;
-	u32 page_offset;
-#endif
-};
-
-struct igb_tx_queue_stats {
-	u64 packets;
-	u64 bytes;
-	u64 restart_queue;
-};
-
-struct igb_rx_queue_stats {
-	u64 packets;
-	u64 bytes;
-	u64 drops;
-	u64 csum_err;
-	u64 alloc_failed;
-	u64 ipv4_packets;      /* IPv4 headers processed */
-	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
-	u64 ipv6_packets;      /* IPv6 headers processed */
-	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
-	u64 tcp_packets;       /* TCP headers processed */
-	u64 udp_packets;       /* UDP headers processed */
-	u64 sctp_packets;      /* SCTP headers processed */
-	u64 nfs_packets;       /* NFS headers processe */
-};
-
-struct igb_ring_container {
-	struct igb_ring *ring;		/* pointer to linked list of rings */
-	unsigned int total_bytes;	/* total bytes processed this int */
-	unsigned int total_packets;	/* total packets processed this int */
-	u16 work_limit;			/* total work allowed per interrupt */
-	u8 count;			/* total number of rings in vector */
-	u8 itr;				/* current ITR setting for ring */
-};
-
-struct igb_ring {
-	struct igb_q_vector *q_vector;  /* backlink to q_vector */
-	struct net_device *netdev;      /* back pointer to net_device */
-	struct device *dev;             /* device for dma mapping */
-	union {				/* array of buffer info structs */
-		struct igb_tx_buffer *tx_buffer_info;
-		struct igb_rx_buffer *rx_buffer_info;
-	};
-#ifdef HAVE_PTP_1588_CLOCK
-	unsigned long last_rx_timestamp;
-#endif /* HAVE_PTP_1588_CLOCK */
-	void *desc;                     /* descriptor ring memory */
-	unsigned long flags;            /* ring specific flags */
-	void __iomem *tail;             /* pointer to ring tail register */
-	dma_addr_t dma;			/* phys address of the ring */
-	unsigned int size;		/* length of desc. ring in bytes */
-
-	u16 count;                      /* number of desc. in the ring */
-	u8 queue_index;                 /* logical index of the ring*/
-	u8 reg_idx;                     /* physical index of the ring */
-
-	/* everything past this point are written often */
-	u16 next_to_clean;
-	u16 next_to_use;
-	u16 next_to_alloc;
-
-	union {
-		/* TX */
-		struct {
-			struct igb_tx_queue_stats tx_stats;
-		};
-		/* RX */
-		struct {
-			struct igb_rx_queue_stats rx_stats;
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-			u16 rx_buffer_len;
-#else
-			struct sk_buff *skb;
-#endif
-		};
-	};
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	struct net_device *vmdq_netdev;
-	int vqueue_index;		/* queue index for virtual netdev */
-#endif
-} ____cacheline_internodealigned_in_smp;
-
-struct igb_q_vector {
-	struct igb_adapter *adapter;	/* backlink */
-	int cpu;			/* CPU for DCA */
-	u32 eims_value;			/* EIMS mask value */
-
-	u16 itr_val;
-	u8 set_itr;
-	void __iomem *itr_register;
-
-	struct igb_ring_container rx, tx;
-
-	struct napi_struct napi;
-#ifndef IGB_NO_LRO
-	struct igb_lro_list lrolist;   /* LRO list for queue vector*/
-#endif
-	char name[IFNAMSIZ + 9];
-#ifndef HAVE_NETDEV_NAPI_LIST
-	struct net_device poll_dev;
-#endif
-
-	/* for dynamic allocation of rings associated with this q_vector */
-	struct igb_ring ring[0] ____cacheline_internodealigned_in_smp;
-};
-
-enum e1000_ring_flags_t {
-#ifndef HAVE_NDO_SET_FEATURES
-	IGB_RING_FLAG_RX_CSUM,
-#endif
-	IGB_RING_FLAG_RX_SCTP_CSUM,
-	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
-	IGB_RING_FLAG_TX_CTX_IDX,
-	IGB_RING_FLAG_TX_DETECT_HANG,
-};
-
-struct igb_mac_addr {
-	u8 addr[ETH_ALEN];
-	u16 queue;
-	u16 state; /* bitmask */
-};
-#define IGB_MAC_STATE_DEFAULT	0x1
-#define IGB_MAC_STATE_MODIFIED	0x2
-#define IGB_MAC_STATE_IN_USE	0x4
-
-#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
-
-#define IGB_RX_DESC(R, i)	    \
-	(&(((union e1000_adv_rx_desc *)((R)->desc))[i]))
-#define IGB_TX_DESC(R, i)	    \
-	(&(((union e1000_adv_tx_desc *)((R)->desc))[i]))
-#define IGB_TX_CTXTDESC(R, i)	    \
-	(&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-#define netdev_ring(ring) \
-	((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev))
-#define ring_queue_index(ring) \
-	((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index))
-#else
-#define netdev_ring(ring) (ring->netdev)
-#define ring_queue_index(ring) (ring->queue_index)
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-
-/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
-static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
-				      const u32 stat_err_bits)
-{
-	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
-}
-
-/* igb_desc_unused - calculate if we have unused descriptors */
-static inline u16 igb_desc_unused(const struct igb_ring *ring)
-{
-	u16 ntc = ring->next_to_clean;
-	u16 ntu = ring->next_to_use;
-
-	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
-}
-
-#ifdef CONFIG_BQL
-static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
-{
-	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
-}
-#endif /* CONFIG_BQL */
-
-// #ifdef EXT_THERMAL_SENSOR_SUPPORT
-// #ifdef IGB_PROCFS
-struct igb_therm_proc_data
-{
-	struct e1000_hw *hw;
-	struct e1000_thermal_diode_data *sensor_data;
-};
-
-//  #endif /* IGB_PROCFS */
-// #endif /* EXT_THERMAL_SENSOR_SUPPORT */
-
-#ifdef IGB_HWMON
-#define IGB_HWMON_TYPE_LOC	0
-#define IGB_HWMON_TYPE_TEMP	1
-#define IGB_HWMON_TYPE_CAUTION	2
-#define IGB_HWMON_TYPE_MAX	3
-
-struct hwmon_attr {
-	struct device_attribute dev_attr;
-	struct e1000_hw *hw;
-	struct e1000_thermal_diode_data *sensor;
-	char name[12];
-	};
-
-struct hwmon_buff {
-	struct device *device;
-	struct hwmon_attr *hwmon_list;
-	unsigned int n_hwmon;
-	};
-#endif /* IGB_HWMON */
-
-/* board specific private data structure */
-struct igb_adapter {
-#ifdef HAVE_VLAN_RX_REGISTER
-	/* vlgrp must be first member of structure */
-	struct vlan_group *vlgrp;
-#else
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-#endif
-	struct net_device *netdev;
-
-	unsigned long state;
-	unsigned int flags;
-
-	unsigned int num_q_vectors;
-	struct msix_entry *msix_entries;
-
-
-	/* TX */
-	u16 tx_work_limit;
-	u32 tx_timeout_count;
-	int num_tx_queues;
-	struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES];
-
-	/* RX */
-	int num_rx_queues;
-	struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES];
-
-	struct timer_list watchdog_timer;
-	struct timer_list dma_err_timer;
-	struct timer_list phy_info_timer;
-	u16 mng_vlan_id;
-	u32 bd_number;
-	u32 wol;
-	u32 en_mng_pt;
-	u16 link_speed;
-	u16 link_duplex;
-	u8 port_num;
-
-	/* Interrupt Throttle Rate */
-	u32 rx_itr_setting;
-	u32 tx_itr_setting;
-
-	struct work_struct reset_task;
-	struct work_struct watchdog_task;
-	struct work_struct dma_err_task;
-	bool fc_autoneg;
-	u8  tx_timeout_factor;
-
-#ifdef DEBUG
-	bool tx_hang_detected;
-	bool disable_hw_reset;
-#endif
-	u32 max_frame_size;
-
-	/* OS defined structs */
-	struct pci_dev *pdev;
-#ifndef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats net_stats;
-#endif
-#ifndef IGB_NO_LRO
-	struct igb_lro_stats lro_stats;
-#endif
-
-	/* structs defined in e1000_hw.h */
-	struct e1000_hw hw;
-	struct e1000_hw_stats stats;
-	struct e1000_phy_info phy_info;
-	struct e1000_phy_stats phy_stats;
-
-#ifdef ETHTOOL_TEST
-	u32 test_icr;
-	struct igb_ring test_tx_ring;
-	struct igb_ring test_rx_ring;
-#endif
-
-	int msg_enable;
-
-	struct igb_q_vector *q_vector[MAX_Q_VECTORS];
-	u32 eims_enable_mask;
-	u32 eims_other;
-
-	/* to not mess up cache alignment, always add to the bottom */
-	u32 *config_space;
-	u16 tx_ring_count;
-	u16 rx_ring_count;
-	struct vf_data_storage *vf_data;
-#ifdef IFLA_VF_MAX
-	int vf_rate_link_speed;
-#endif
-	u32 lli_port;
-	u32 lli_size;
-	unsigned int vfs_allocated_count;
-	/* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */
-	bool mdd;
-	int int_mode;
-	u32 rss_queues;
-	u32 vmdq_pools;
-	char fw_version[43];
-	u32 wvbr;
-	struct igb_mac_addr *mac_table;
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES];
-#endif
-	int vferr_refcount;
-	int dmac;
-	u32 *shadow_vfta;
-
-	/* External Thermal Sensor support flag */
-	bool ets;
-#ifdef IGB_HWMON
-	struct hwmon_buff igb_hwmon_buff;
-#else /* IGB_HWMON */
-#ifdef IGB_PROCFS
-	struct proc_dir_entry *eth_dir;
-	struct proc_dir_entry *info_dir;
-	struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS];
-	struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS];
-	bool old_lsc;
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-	u32 etrack_id;
-
-#ifdef HAVE_PTP_1588_CLOCK
-	struct ptp_clock *ptp_clock;
-	struct ptp_clock_info ptp_caps;
-	struct delayed_work ptp_overflow_work;
-	struct work_struct ptp_tx_work;
-	struct sk_buff *ptp_tx_skb;
-	unsigned long ptp_tx_start;
-	unsigned long last_rx_ptp_check;
-	spinlock_t tmreg_lock;
-	struct cyclecounter cc;
-	struct timecounter tc;
-	u32 tx_hwtstamp_timeouts;
-	u32 rx_hwtstamp_cleared;
-#endif /* HAVE_PTP_1588_CLOCK */
-
-#ifdef HAVE_I2C_SUPPORT
-	struct i2c_algo_bit_data i2c_algo;
-	struct i2c_adapter i2c_adap;
-	struct i2c_client *i2c_client;
-#endif /* HAVE_I2C_SUPPORT */
-	unsigned long link_check_timeout;
-
-
-	int devrc;
-
-	int copper_tries;
-	u16 eee_advert;
-};
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-struct igb_vmdq_adapter {
-#ifdef HAVE_VLAN_RX_REGISTER
-	/* vlgrp must be first member of structure */
-	struct vlan_group *vlgrp;
-#else
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-#endif
-	struct igb_adapter *real_adapter;
-	struct net_device *vnetdev;
-	struct net_device_stats net_stats;
-	struct igb_ring *tx_ring;
-	struct igb_ring *rx_ring;
-};
-#endif
-
-#define IGB_FLAG_HAS_MSI		(1 << 0)
-#define IGB_FLAG_DCA_ENABLED		(1 << 1)
-#define IGB_FLAG_LLI_PUSH		(1 << 2)
-#define IGB_FLAG_QUAD_PORT_A		(1 << 3)
-#define IGB_FLAG_QUEUE_PAIRS		(1 << 4)
-#define IGB_FLAG_EEE			(1 << 5)
-#define IGB_FLAG_DMAC			(1 << 6)
-#define IGB_FLAG_DETECT_BAD_DMA		(1 << 7)
-#define IGB_FLAG_PTP			(1 << 8)
-#define IGB_FLAG_RSS_FIELD_IPV4_UDP	(1 << 9)
-#define IGB_FLAG_RSS_FIELD_IPV6_UDP	(1 << 10)
-#define IGB_FLAG_WOL_SUPPORTED		(1 << 11)
-#define IGB_FLAG_NEED_LINK_UPDATE	(1 << 12)
-#define IGB_FLAG_LOOPBACK_ENABLE	(1 << 13)
-#define IGB_FLAG_MEDIA_RESET		(1 << 14)
-#define IGB_FLAG_MAS_ENABLE		(1 << 15)
-
-/* Media Auto Sense */
-#define IGB_MAS_ENABLE_0		0X0001
-#define IGB_MAS_ENABLE_1		0X0002
-#define IGB_MAS_ENABLE_2		0X0004
-#define IGB_MAS_ENABLE_3		0X0008
-
-#define IGB_MIN_TXPBSIZE           20408
-#define IGB_TX_BUF_4096            4096
-
-#define IGB_DMCTLX_DCFLUSH_DIS     0x80000000  /* Disable DMA Coal Flush */
-
-/* DMA Coalescing defines */
-#define IGB_DMAC_DISABLE          0
-#define IGB_DMAC_MIN            250
-#define IGB_DMAC_500            500
-#define IGB_DMAC_EN_DEFAULT    1000
-#define IGB_DMAC_2000          2000
-#define IGB_DMAC_3000          3000
-#define IGB_DMAC_4000          4000
-#define IGB_DMAC_5000          5000
-#define IGB_DMAC_6000          6000
-#define IGB_DMAC_7000          7000
-#define IGB_DMAC_8000          8000
-#define IGB_DMAC_9000          9000
-#define IGB_DMAC_MAX          10000
-
-#define IGB_82576_TSYNC_SHIFT 19
-#define IGB_82580_TSYNC_SHIFT 24
-#define IGB_TS_HDR_LEN        16
-
-/* CEM Support */
-#define FW_HDR_LEN           0x4
-#define FW_CMD_DRV_INFO      0xDD
-#define FW_CMD_DRV_INFO_LEN  0x5
-#define FW_CMD_RESERVED      0X0
-#define FW_RESP_SUCCESS      0x1
-#define FW_UNUSED_VER        0x0
-#define FW_MAX_RETRIES       3
-#define FW_STATUS_SUCCESS    0x1
-#define FW_FAMILY_DRV_VER    0Xffffffff
-
-#define IGB_MAX_LINK_TRIES   20
-
-struct e1000_fw_hdr {
-	u8 cmd;
-	u8 buf_len;
-	union
-	{
-		u8 cmd_resv;
-		u8 ret_status;
-	} cmd_or_resp;
-	u8 checksum;
-};
-
-#pragma pack(push,1)
-struct e1000_fw_drv_info {
-	struct e1000_fw_hdr hdr;
-	u8 port_num;
-	u32 drv_version;
-	u16 pad; /* end spacing to ensure length is mult. of dword */
-	u8  pad2; /* end spacing to ensure length is mult. of dword2 */
-};
-#pragma pack(pop)
-
-enum e1000_state_t {
-	__IGB_TESTING,
-	__IGB_RESETTING,
-	__IGB_DOWN
-};
-
-extern char igb_driver_name[];
-extern char igb_driver_version[];
-
-extern int igb_up(struct igb_adapter *);
-extern void igb_down(struct igb_adapter *);
-extern void igb_reinit_locked(struct igb_adapter *);
-extern void igb_reset(struct igb_adapter *);
-extern int igb_set_spd_dplx(struct igb_adapter *, u16);
-extern int igb_setup_tx_resources(struct igb_ring *);
-extern int igb_setup_rx_resources(struct igb_ring *);
-extern void igb_free_tx_resources(struct igb_ring *);
-extern void igb_free_rx_resources(struct igb_ring *);
-extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
-extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
-extern void igb_setup_tctl(struct igb_adapter *);
-extern void igb_setup_rctl(struct igb_adapter *);
-extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
-extern void igb_unmap_and_free_tx_resource(struct igb_ring *,
-                                           struct igb_tx_buffer *);
-extern void igb_alloc_rx_buffers(struct igb_ring *, u16);
-extern void igb_clean_rx_ring(struct igb_ring *);
-extern void igb_update_stats(struct igb_adapter *);
-extern bool igb_has_link(struct igb_adapter *adapter);
-extern void igb_set_ethtool_ops(struct net_device *);
-extern void igb_check_options(struct igb_adapter *);
-extern void igb_power_up_link(struct igb_adapter *);
-#ifdef HAVE_PTP_1588_CLOCK
-extern void igb_ptp_init(struct igb_adapter *adapter);
-extern void igb_ptp_stop(struct igb_adapter *adapter);
-extern void igb_ptp_reset(struct igb_adapter *adapter);
-extern void igb_ptp_tx_work(struct work_struct *work);
-extern void igb_ptp_rx_hang(struct igb_adapter *adapter);
-extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
-extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
-				struct sk_buff *skb);
-extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
-				unsigned char *va,
-				struct sk_buff *skb);
-static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring,
-				       union e1000_adv_rx_desc *rx_desc,
-				       struct sk_buff *skb)
-{
-	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
-		skb_pull(skb, IGB_TS_HDR_LEN);
-#endif
-		return;
-	}
-
-	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS))
-		igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
-
-	/* Update the last_rx_timestamp timer in order to enable watchdog check
-	 * for error case of latched timestamp on a dropped packet.
-	 */
-	rx_ring->last_rx_timestamp = jiffies;
-}
-
-extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
-				  struct ifreq *ifr, int cmd);
-#endif /* HAVE_PTP_1588_CLOCK */
-#ifdef ETHTOOL_OPS_COMPAT
-extern int ethtool_ioctl(struct ifreq *);
-#endif
-extern int igb_write_mc_addr_list(struct net_device *netdev);
-extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue);
-extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue);
-extern int igb_available_rars(struct igb_adapter *adapter);
-extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32);
-extern void igb_configure_vt_default_pool(struct igb_adapter *adapter);
-extern void igb_enable_vlan_tags(struct igb_adapter *adapter);
-#ifndef HAVE_VLAN_RX_REGISTER
-extern void igb_vlan_mode(struct net_device *, u32);
-#endif
-
-#define E1000_PCS_CFG_IGN_SD	1
-
-#ifdef IGB_HWMON
-void igb_sysfs_exit(struct igb_adapter *adapter);
-int igb_sysfs_init(struct igb_adapter *adapter);
-#else
-#ifdef IGB_PROCFS
-int igb_procfs_init(struct igb_adapter* adapter);
-void igb_procfs_exit(struct igb_adapter* adapter);
-int igb_procfs_topdir_init(void);
-void igb_procfs_topdir_exit(void);
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-
-
-
-#endif /* _IGB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
deleted file mode 100644
index 064528bc..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
+++ /dev/null
@@ -1,2842 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/* ethtool support for igb */
-
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-
-#ifdef SIOCETHTOOL
-#include <linux/ethtool.h>
-#ifdef CONFIG_PM_RUNTIME
-#include <linux/pm_runtime.h>
-#endif /* CONFIG_PM_RUNTIME */
-#include <linux/highmem.h>
-
-#include "igb.h"
-#include "igb_regtest.h"
-#include <linux/if_vlan.h>
-#ifdef ETHTOOL_GEEE
-#include <linux/mdio.h>
-#endif
-
-#ifdef ETHTOOL_OPS_COMPAT
-#include "kcompat_ethtool.c"
-#endif
-#ifdef ETHTOOL_GSTATS
-struct igb_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int sizeof_stat;
-	int stat_offset;
-};
-
-#define IGB_STAT(_name, _stat) { \
-	.stat_string = _name, \
-	.sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \
-	.stat_offset = offsetof(struct igb_adapter, _stat) \
-}
-static const struct igb_stats igb_gstrings_stats[] = {
-	IGB_STAT("rx_packets", stats.gprc),
-	IGB_STAT("tx_packets", stats.gptc),
-	IGB_STAT("rx_bytes", stats.gorc),
-	IGB_STAT("tx_bytes", stats.gotc),
-	IGB_STAT("rx_broadcast", stats.bprc),
-	IGB_STAT("tx_broadcast", stats.bptc),
-	IGB_STAT("rx_multicast", stats.mprc),
-	IGB_STAT("tx_multicast", stats.mptc),
-	IGB_STAT("multicast", stats.mprc),
-	IGB_STAT("collisions", stats.colc),
-	IGB_STAT("rx_crc_errors", stats.crcerrs),
-	IGB_STAT("rx_no_buffer_count", stats.rnbc),
-	IGB_STAT("rx_missed_errors", stats.mpc),
-	IGB_STAT("tx_aborted_errors", stats.ecol),
-	IGB_STAT("tx_carrier_errors", stats.tncrs),
-	IGB_STAT("tx_window_errors", stats.latecol),
-	IGB_STAT("tx_abort_late_coll", stats.latecol),
-	IGB_STAT("tx_deferred_ok", stats.dc),
-	IGB_STAT("tx_single_coll_ok", stats.scc),
-	IGB_STAT("tx_multi_coll_ok", stats.mcc),
-	IGB_STAT("tx_timeout_count", tx_timeout_count),
-	IGB_STAT("rx_long_length_errors", stats.roc),
-	IGB_STAT("rx_short_length_errors", stats.ruc),
-	IGB_STAT("rx_align_errors", stats.algnerrc),
-	IGB_STAT("tx_tcp_seg_good", stats.tsctc),
-	IGB_STAT("tx_tcp_seg_failed", stats.tsctfc),
-	IGB_STAT("rx_flow_control_xon", stats.xonrxc),
-	IGB_STAT("rx_flow_control_xoff", stats.xoffrxc),
-	IGB_STAT("tx_flow_control_xon", stats.xontxc),
-	IGB_STAT("tx_flow_control_xoff", stats.xofftxc),
-	IGB_STAT("rx_long_byte_count", stats.gorc),
-	IGB_STAT("tx_dma_out_of_sync", stats.doosync),
-#ifndef IGB_NO_LRO
-	IGB_STAT("lro_aggregated", lro_stats.coal),
-	IGB_STAT("lro_flushed", lro_stats.flushed),
-#endif /* IGB_LRO */
-	IGB_STAT("tx_smbus", stats.mgptc),
-	IGB_STAT("rx_smbus", stats.mgprc),
-	IGB_STAT("dropped_smbus", stats.mgpdc),
-	IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
-	IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
-	IGB_STAT("os2bmc_tx_by_host", stats.o2bspc),
-	IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc),
-#ifdef HAVE_PTP_1588_CLOCK
-	IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
-	IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
-#endif /* HAVE_PTP_1588_CLOCK */
-};
-
-#define IGB_NETDEV_STAT(_net_stat) { \
-	.stat_string = #_net_stat, \
-	.sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
-	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
-}
-static const struct igb_stats igb_gstrings_net_stats[] = {
-	IGB_NETDEV_STAT(rx_errors),
-	IGB_NETDEV_STAT(tx_errors),
-	IGB_NETDEV_STAT(tx_dropped),
-	IGB_NETDEV_STAT(rx_length_errors),
-	IGB_NETDEV_STAT(rx_over_errors),
-	IGB_NETDEV_STAT(rx_frame_errors),
-	IGB_NETDEV_STAT(rx_fifo_errors),
-	IGB_NETDEV_STAT(tx_fifo_errors),
-	IGB_NETDEV_STAT(tx_heartbeat_errors)
-};
-
-#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats)
-#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats)
-#define IGB_RX_QUEUE_STATS_LEN \
-	(sizeof(struct igb_rx_queue_stats) / sizeof(u64))
-#define IGB_TX_QUEUE_STATS_LEN \
-	(sizeof(struct igb_tx_queue_stats) / sizeof(u64))
-#define IGB_QUEUE_STATS_LEN \
-	((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \
-	  IGB_RX_QUEUE_STATS_LEN) + \
-	 (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \
-	  IGB_TX_QUEUE_STATS_LEN))
-#define IGB_STATS_LEN \
-	(IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
-
-#endif /* ETHTOOL_GSTATS */
-#ifdef ETHTOOL_TEST
-static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
-	"Register test  (offline)", "Eeprom test    (offline)",
-	"Interrupt test (offline)", "Loopback test  (offline)",
-	"Link test   (on/offline)"
-};
-#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
-#endif /* ETHTOOL_TEST */
-
-static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 status;
-
-	if (hw->phy.media_type == e1000_media_type_copper) {
-
-		ecmd->supported = (SUPPORTED_10baseT_Half |
-				   SUPPORTED_10baseT_Full |
-				   SUPPORTED_100baseT_Half |
-				   SUPPORTED_100baseT_Full |
-				   SUPPORTED_1000baseT_Full|
-				   SUPPORTED_Autoneg |
-				   SUPPORTED_TP |
-				   SUPPORTED_Pause);
-		ecmd->advertising = ADVERTISED_TP;
-
-		if (hw->mac.autoneg == 1) {
-			ecmd->advertising |= ADVERTISED_Autoneg;
-			/* the e1000 autoneg seems to match ethtool nicely */
-			ecmd->advertising |= hw->phy.autoneg_advertised;
-		}
-
-		ecmd->port = PORT_TP;
-		ecmd->phy_address = hw->phy.addr;
-		ecmd->transceiver = XCVR_INTERNAL;
-
-	} else {
-		ecmd->supported = (SUPPORTED_1000baseT_Full |
-				   SUPPORTED_100baseT_Full |
-				   SUPPORTED_FIBRE |
-				   SUPPORTED_Autoneg |
-				   SUPPORTED_Pause);
-		if (hw->mac.type == e1000_i354)
-			ecmd->supported |= (SUPPORTED_2500baseX_Full);
-
-		ecmd->advertising = ADVERTISED_FIBRE;
-
-		switch (adapter->link_speed) {
-		case SPEED_2500:
-			ecmd->advertising = ADVERTISED_2500baseX_Full;
-			break;
-		case SPEED_1000:
-			ecmd->advertising = ADVERTISED_1000baseT_Full;
-			break;
-		case SPEED_100:
-			ecmd->advertising = ADVERTISED_100baseT_Full;
-			break;
-		default:
-			break;
-		}
-
-		if (hw->mac.autoneg == 1)
-			ecmd->advertising |= ADVERTISED_Autoneg;
-
-		ecmd->port = PORT_FIBRE;
-		ecmd->transceiver = XCVR_EXTERNAL;
-	}
-
-	if (hw->mac.autoneg != 1)
-		ecmd->advertising &= ~(ADVERTISED_Pause |
-				       ADVERTISED_Asym_Pause);
-
-	if (hw->fc.requested_mode == e1000_fc_full)
-		ecmd->advertising |= ADVERTISED_Pause;
-	else if (hw->fc.requested_mode == e1000_fc_rx_pause)
-		ecmd->advertising |= (ADVERTISED_Pause |
-				      ADVERTISED_Asym_Pause);
-	else if (hw->fc.requested_mode == e1000_fc_tx_pause)
-		ecmd->advertising |=  ADVERTISED_Asym_Pause;
-	else
-		ecmd->advertising &= ~(ADVERTISED_Pause |
-				       ADVERTISED_Asym_Pause);
-
-	status = E1000_READ_REG(hw, E1000_STATUS);
-
-	if (status & E1000_STATUS_LU) {
-		if ((hw->mac.type == e1000_i354) &&
-		    (status & E1000_STATUS_2P5_SKU) &&
-		    !(status & E1000_STATUS_2P5_SKU_OVER))
-			ecmd->speed = SPEED_2500;
-		else if (status & E1000_STATUS_SPEED_1000)
-			ecmd->speed = SPEED_1000;
-		else if (status & E1000_STATUS_SPEED_100)
-			ecmd->speed = SPEED_100;
-		else
-			ecmd->speed = SPEED_10;
-
-		if ((status & E1000_STATUS_FD) ||
-		    hw->phy.media_type != e1000_media_type_copper)
-			ecmd->duplex = DUPLEX_FULL;
-		else
-			ecmd->duplex = DUPLEX_HALF;
-
-	} else {
-		ecmd->speed = -1;
-		ecmd->duplex = -1;
-	}
-
-	if ((hw->phy.media_type == e1000_media_type_fiber) ||
-	    hw->mac.autoneg)
-		ecmd->autoneg = AUTONEG_ENABLE;
-	else
-		ecmd->autoneg = AUTONEG_DISABLE;
-#ifdef ETH_TP_MDI_X
-
-	/* MDI-X => 2; MDI =>1; Invalid =>0 */
-	if (hw->phy.media_type == e1000_media_type_copper)
-		ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
-						      ETH_TP_MDI;
-	else
-		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
-
-#ifdef ETH_TP_MDI_AUTO
-	if (hw->phy.mdix == AUTO_ALL_MODES)
-		ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
-	else
-		ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
-
-#endif
-#endif /* ETH_TP_MDI_X */
-	return 0;
-}
-
-static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (ecmd->duplex  == DUPLEX_HALF) {
-		if (!hw->dev_spec._82575.eee_disable)
-			dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n");
-		hw->dev_spec._82575.eee_disable = true;
-	} else {
-		if (hw->dev_spec._82575.eee_disable)
-			dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n");
-		hw->dev_spec._82575.eee_disable = false;
-	}
-
-	/* When SoL/IDER sessions are active, autoneg/speed/duplex
-	 * cannot be changed */
-	if (e1000_check_reset_block(hw)) {
-		dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link "
-			"characteristics when SoL/IDER is active.\n");
-		return -EINVAL;
-	}
-
-#ifdef ETH_TP_MDI_AUTO
-	/*
-	 * MDI setting is only allowed when autoneg enabled because
-	 * some hardware doesn't allow MDI setting when speed or
-	 * duplex is forced.
-	 */
-	if (ecmd->eth_tp_mdix_ctrl) {
-		if (hw->phy.media_type != e1000_media_type_copper)
-			return -EOPNOTSUPP;
-
-		if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
-		    (ecmd->autoneg != AUTONEG_ENABLE)) {
-			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
-			return -EINVAL;
-		}
-	}
-
-#endif /* ETH_TP_MDI_AUTO */
-	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
-		hw->mac.autoneg = 1;
-		if (hw->phy.media_type == e1000_media_type_fiber) {
-			hw->phy.autoneg_advertised = ecmd->advertising |
-						     ADVERTISED_FIBRE |
-						     ADVERTISED_Autoneg;
-			switch (adapter->link_speed) {
-			case SPEED_2500:
-				hw->phy.autoneg_advertised =
-					ADVERTISED_2500baseX_Full;
-				break;
-			case SPEED_1000:
-				hw->phy.autoneg_advertised =
-					ADVERTISED_1000baseT_Full;
-				break;
-			case SPEED_100:
-				hw->phy.autoneg_advertised =
-					ADVERTISED_100baseT_Full;
-				break;
-			default:
-				break;
-			}
-		} else {
-			hw->phy.autoneg_advertised = ecmd->advertising |
-						     ADVERTISED_TP |
-						     ADVERTISED_Autoneg;
-		}
-		ecmd->advertising = hw->phy.autoneg_advertised;
-		if (adapter->fc_autoneg)
-			hw->fc.requested_mode = e1000_fc_default;
-	} else {
-		if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) {
-			clear_bit(__IGB_RESETTING, &adapter->state);
-			return -EINVAL;
-		}
-	}
-
-#ifdef ETH_TP_MDI_AUTO
-	/* MDI-X => 2; MDI => 1; Auto => 3 */
-	if (ecmd->eth_tp_mdix_ctrl) {
-		/* fix up the value for auto (3 => 0) as zero is mapped
-		 * internally to auto
-		 */
-		if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
-			hw->phy.mdix = AUTO_ALL_MODES;
-		else
-			hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
-	}
-
-#endif /* ETH_TP_MDI_AUTO */
-	/* reset the link */
-	if (netif_running(adapter->netdev)) {
-		igb_down(adapter);
-		igb_up(adapter);
-	} else
-		igb_reset(adapter);
-
-	clear_bit(__IGB_RESETTING, &adapter->state);
-	return 0;
-}
-
-static u32 igb_get_link(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_mac_info *mac = &adapter->hw.mac;
-
-	/*
-	 * If the link is not reported up to netdev, interrupts are disabled,
-	 * and so the physical link state may have changed since we last
-	 * looked. Set get_link_status to make sure that the true link
-	 * state is interrogated, rather than pulling a cached and possibly
-	 * stale link state from the driver.
-	 */
-	if (!netif_carrier_ok(netdev))
-		mac->get_link_status = 1;
-
-	return igb_has_link(adapter);
-}
-
-static void igb_get_pauseparam(struct net_device *netdev,
-			       struct ethtool_pauseparam *pause)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-
-	pause->autoneg =
-		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
-
-	if (hw->fc.current_mode == e1000_fc_rx_pause)
-		pause->rx_pause = 1;
-	else if (hw->fc.current_mode == e1000_fc_tx_pause)
-		pause->tx_pause = 1;
-	else if (hw->fc.current_mode == e1000_fc_full) {
-		pause->rx_pause = 1;
-		pause->tx_pause = 1;
-	}
-}
-
-static int igb_set_pauseparam(struct net_device *netdev,
-			      struct ethtool_pauseparam *pause)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	int retval = 0;
-
-	adapter->fc_autoneg = pause->autoneg;
-
-	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-
-	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
-		hw->fc.requested_mode = e1000_fc_default;
-		if (netif_running(adapter->netdev)) {
-			igb_down(adapter);
-			igb_up(adapter);
-		} else {
-			igb_reset(adapter);
-		}
-	} else {
-		if (pause->rx_pause && pause->tx_pause)
-			hw->fc.requested_mode = e1000_fc_full;
-		else if (pause->rx_pause && !pause->tx_pause)
-			hw->fc.requested_mode = e1000_fc_rx_pause;
-		else if (!pause->rx_pause && pause->tx_pause)
-			hw->fc.requested_mode = e1000_fc_tx_pause;
-		else if (!pause->rx_pause && !pause->tx_pause)
-			hw->fc.requested_mode = e1000_fc_none;
-
-		hw->fc.current_mode = hw->fc.requested_mode;
-
-		if (hw->phy.media_type == e1000_media_type_fiber) {
-			retval = hw->mac.ops.setup_link(hw);
-			/* implicit goto out */
-		} else {
-			retval = e1000_force_mac_fc(hw);
-			if (retval)
-				goto out;
-			e1000_set_fc_watermarks_generic(hw);
-		}
-	}
-
-out:
-	clear_bit(__IGB_RESETTING, &adapter->state);
-	return retval;
-}
-
-static u32 igb_get_msglevel(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	return adapter->msg_enable;
-}
-
-static void igb_set_msglevel(struct net_device *netdev, u32 data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	adapter->msg_enable = data;
-}
-
-static int igb_get_regs_len(struct net_device *netdev)
-{
-#define IGB_REGS_LEN 555
-	return IGB_REGS_LEN * sizeof(u32);
-}
-
-static void igb_get_regs(struct net_device *netdev,
-			 struct ethtool_regs *regs, void *p)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 *regs_buff = p;
-	u8 i;
-
-	memset(p, 0, IGB_REGS_LEN * sizeof(u32));
-
-	regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
-
-	/* General Registers */
-	regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL);
-	regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS);
-	regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC);
-	regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL);
-	regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW);
-	regs_buff[6] = E1000_READ_REG(hw, E1000_VET);
-	regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL);
-	regs_buff[8] = E1000_READ_REG(hw, E1000_PBA);
-	regs_buff[9] = E1000_READ_REG(hw, E1000_PBS);
-	regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER);
-	regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER);
-
-	/* NVM Register */
-	regs_buff[12] = E1000_READ_REG(hw, E1000_EECD);
-
-	/* Interrupt */
-	/* Reading EICS for EICR because they read the
-	 * same but EICS does not clear on read */
-	regs_buff[13] = E1000_READ_REG(hw, E1000_EICS);
-	regs_buff[14] = E1000_READ_REG(hw, E1000_EICS);
-	regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS);
-	regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC);
-	regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC);
-	regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM);
-	/* Reading ICS for ICR because they read the
-	 * same but ICS does not clear on read */
-	regs_buff[19] = E1000_READ_REG(hw, E1000_ICS);
-	regs_buff[20] = E1000_READ_REG(hw, E1000_ICS);
-	regs_buff[21] = E1000_READ_REG(hw, E1000_IMS);
-	regs_buff[22] = E1000_READ_REG(hw, E1000_IMC);
-	regs_buff[23] = E1000_READ_REG(hw, E1000_IAC);
-	regs_buff[24] = E1000_READ_REG(hw, E1000_IAM);
-	regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP);
-
-	/* Flow Control */
-	regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL);
-	regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH);
-	regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV);
-	regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL);
-	regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH);
-	regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV);
-
-	/* Receive */
-	regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL);
-	regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM);
-	regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML);
-	regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL);
-	regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC);
-	regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL);
-
-	/* Transmit */
-	regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL);
-	regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT);
-	regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG);
-	regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL);
-
-	/* Wake Up */
-	regs_buff[42] = E1000_READ_REG(hw, E1000_WUC);
-	regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC);
-	regs_buff[44] = E1000_READ_REG(hw, E1000_WUS);
-	regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV);
-	regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL);
-
-	/* MAC */
-	regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0);
-	regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL);
-	regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT);
-	regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV);
-	regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB);
-	regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX);
-	regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP);
-
-	/* Statistics */
-	regs_buff[54] = adapter->stats.crcerrs;
-	regs_buff[55] = adapter->stats.algnerrc;
-	regs_buff[56] = adapter->stats.symerrs;
-	regs_buff[57] = adapter->stats.rxerrc;
-	regs_buff[58] = adapter->stats.mpc;
-	regs_buff[59] = adapter->stats.scc;
-	regs_buff[60] = adapter->stats.ecol;
-	regs_buff[61] = adapter->stats.mcc;
-	regs_buff[62] = adapter->stats.latecol;
-	regs_buff[63] = adapter->stats.colc;
-	regs_buff[64] = adapter->stats.dc;
-	regs_buff[65] = adapter->stats.tncrs;
-	regs_buff[66] = adapter->stats.sec;
-	regs_buff[67] = adapter->stats.htdpmc;
-	regs_buff[68] = adapter->stats.rlec;
-	regs_buff[69] = adapter->stats.xonrxc;
-	regs_buff[70] = adapter->stats.xontxc;
-	regs_buff[71] = adapter->stats.xoffrxc;
-	regs_buff[72] = adapter->stats.xofftxc;
-	regs_buff[73] = adapter->stats.fcruc;
-	regs_buff[74] = adapter->stats.prc64;
-	regs_buff[75] = adapter->stats.prc127;
-	regs_buff[76] = adapter->stats.prc255;
-	regs_buff[77] = adapter->stats.prc511;
-	regs_buff[78] = adapter->stats.prc1023;
-	regs_buff[79] = adapter->stats.prc1522;
-	regs_buff[80] = adapter->stats.gprc;
-	regs_buff[81] = adapter->stats.bprc;
-	regs_buff[82] = adapter->stats.mprc;
-	regs_buff[83] = adapter->stats.gptc;
-	regs_buff[84] = adapter->stats.gorc;
-	regs_buff[86] = adapter->stats.gotc;
-	regs_buff[88] = adapter->stats.rnbc;
-	regs_buff[89] = adapter->stats.ruc;
-	regs_buff[90] = adapter->stats.rfc;
-	regs_buff[91] = adapter->stats.roc;
-	regs_buff[92] = adapter->stats.rjc;
-	regs_buff[93] = adapter->stats.mgprc;
-	regs_buff[94] = adapter->stats.mgpdc;
-	regs_buff[95] = adapter->stats.mgptc;
-	regs_buff[96] = adapter->stats.tor;
-	regs_buff[98] = adapter->stats.tot;
-	regs_buff[100] = adapter->stats.tpr;
-	regs_buff[101] = adapter->stats.tpt;
-	regs_buff[102] = adapter->stats.ptc64;
-	regs_buff[103] = adapter->stats.ptc127;
-	regs_buff[104] = adapter->stats.ptc255;
-	regs_buff[105] = adapter->stats.ptc511;
-	regs_buff[106] = adapter->stats.ptc1023;
-	regs_buff[107] = adapter->stats.ptc1522;
-	regs_buff[108] = adapter->stats.mptc;
-	regs_buff[109] = adapter->stats.bptc;
-	regs_buff[110] = adapter->stats.tsctc;
-	regs_buff[111] = adapter->stats.iac;
-	regs_buff[112] = adapter->stats.rpthc;
-	regs_buff[113] = adapter->stats.hgptc;
-	regs_buff[114] = adapter->stats.hgorc;
-	regs_buff[116] = adapter->stats.hgotc;
-	regs_buff[118] = adapter->stats.lenerrs;
-	regs_buff[119] = adapter->stats.scvpc;
-	regs_buff[120] = adapter->stats.hrmpc;
-
-	for (i = 0; i < 4; i++)
-		regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
-
-	for (i = 0; i < 10; i++)
-		regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i));
-	for (i = 0; i < 16; i++)
-		regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i));
-	for (i = 0; i < 16; i++)
-		regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i));
-
-	for (i = 0; i < 4; i++)
-		regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i));
-
-	for (i = 0; i < 4; i++)
-		regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i));
-	for (i = 0; i < 128; i++)
-		regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i));
-	for (i = 0; i < 128; i++)
-		regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i));
-	for (i = 0; i < 4; i++)
-		regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i));
-
-	regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH);
-	regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT);
-	regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS);
-	regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC);
-	if (hw->mac.type > e1000_82580) {
-		regs_buff[551] = adapter->stats.o2bgptc;
-		regs_buff[552] = adapter->stats.b2ospc;
-		regs_buff[553] = adapter->stats.o2bspc;
-		regs_buff[554] = adapter->stats.b2ogprc;
-	}
-}
-
-static int igb_get_eeprom_len(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	return adapter->hw.nvm.word_size * 2;
-}
-
-static int igb_get_eeprom(struct net_device *netdev,
-			  struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u16 *eeprom_buff;
-	int first_word, last_word;
-	int ret_val = 0;
-	u16 i;
-
-	if (eeprom->len == 0)
-		return -EINVAL;
-
-	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-
-	eeprom_buff = kmalloc(sizeof(u16) *
-			(last_word - first_word + 1), GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	if (hw->nvm.type == e1000_nvm_eeprom_spi)
-		ret_val = e1000_read_nvm(hw, first_word,
-					 last_word - first_word + 1,
-					 eeprom_buff);
-	else {
-		for (i = 0; i < last_word - first_word + 1; i++) {
-			ret_val = e1000_read_nvm(hw, first_word + i, 1,
-						 &eeprom_buff[i]);
-			if (ret_val)
-				break;
-		}
-	}
-
-	/* Device's eeprom is always little-endian, word addressable */
-	for (i = 0; i < last_word - first_word + 1; i++)
-		eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]);
-
-	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
-			eeprom->len);
-	kfree(eeprom_buff);
-
-	return ret_val;
-}
-
-static int igb_set_eeprom(struct net_device *netdev,
-			  struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u16 *eeprom_buff;
-	void *ptr;
-	int max_len, first_word, last_word, ret_val = 0;
-	u16 i;
-
-	if (eeprom->len == 0)
-		return -EOPNOTSUPP;
-
-	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
-		return -EFAULT;
-
-	max_len = hw->nvm.word_size * 2;
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	ptr = (void *)eeprom_buff;
-
-	if (eeprom->offset & 1) {
-		/* need read/modify/write of first changed EEPROM word */
-		/* only the second byte of the word is being modified */
-		ret_val = e1000_read_nvm(hw, first_word, 1,
-					    &eeprom_buff[0]);
-		ptr++;
-	}
-	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
-		/* need read/modify/write of last changed EEPROM word */
-		/* only the first byte of the word is being modified */
-		ret_val = e1000_read_nvm(hw, last_word, 1,
-			  &eeprom_buff[last_word - first_word]);
-	}
-
-	/* Device's eeprom is always little-endian, word addressable */
-	for (i = 0; i < last_word - first_word + 1; i++)
-		le16_to_cpus(&eeprom_buff[i]);
-
-	memcpy(ptr, bytes, eeprom->len);
-
-	for (i = 0; i < last_word - first_word + 1; i++)
-		cpu_to_le16s(&eeprom_buff[i]);
-
-	ret_val = e1000_write_nvm(hw, first_word,
-				  last_word - first_word + 1, eeprom_buff);
-
-	/* Update the checksum if write succeeded.
-	 * and flush shadow RAM for 82573 controllers */
-	if (ret_val == 0)
-		e1000_update_nvm_checksum(hw);
-
-	kfree(eeprom_buff);
-	return ret_val;
-}
-
-static void igb_get_drvinfo(struct net_device *netdev,
-			    struct ethtool_drvinfo *drvinfo)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	strncpy(drvinfo->driver,  igb_driver_name, sizeof(drvinfo->driver) - 1);
-	strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1);
-
-	strncpy(drvinfo->fw_version, adapter->fw_version,
-		sizeof(drvinfo->fw_version) - 1);
-	strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1);
-	drvinfo->n_stats = IGB_STATS_LEN;
-	drvinfo->testinfo_len = IGB_TEST_LEN;
-	drvinfo->regdump_len = igb_get_regs_len(netdev);
-	drvinfo->eedump_len = igb_get_eeprom_len(netdev);
-}
-
-static void igb_get_ringparam(struct net_device *netdev,
-			      struct ethtool_ringparam *ring)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	ring->rx_max_pending = IGB_MAX_RXD;
-	ring->tx_max_pending = IGB_MAX_TXD;
-	ring->rx_mini_max_pending = 0;
-	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = adapter->rx_ring_count;
-	ring->tx_pending = adapter->tx_ring_count;
-	ring->rx_mini_pending = 0;
-	ring->rx_jumbo_pending = 0;
-}
-
-static int igb_set_ringparam(struct net_device *netdev,
-			     struct ethtool_ringparam *ring)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct igb_ring *temp_ring;
-	int i, err = 0;
-	u16 new_rx_count, new_tx_count;
-
-	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
-		return -EINVAL;
-
-	new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD);
-	new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD);
-	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
-
-	new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD);
-	new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD);
-	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
-
-	if ((new_tx_count == adapter->tx_ring_count) &&
-	    (new_rx_count == adapter->rx_ring_count)) {
-		/* nothing to do */
-		return 0;
-	}
-
-	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-
-	if (!netif_running(adapter->netdev)) {
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			adapter->tx_ring[i]->count = new_tx_count;
-		for (i = 0; i < adapter->num_rx_queues; i++)
-			adapter->rx_ring[i]->count = new_rx_count;
-		adapter->tx_ring_count = new_tx_count;
-		adapter->rx_ring_count = new_rx_count;
-		goto clear_reset;
-	}
-
-	if (adapter->num_tx_queues > adapter->num_rx_queues)
-		temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring));
-	else
-		temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring));
-
-	if (!temp_ring) {
-		err = -ENOMEM;
-		goto clear_reset;
-	}
-
-	igb_down(adapter);
-
-	/*
-	 * We can't just free everything and then setup again,
-	 * because the ISRs in MSI-X mode get passed pointers
-	 * to the tx and rx ring structs.
-	 */
-	if (new_tx_count != adapter->tx_ring_count) {
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			memcpy(&temp_ring[i], adapter->tx_ring[i],
-			       sizeof(struct igb_ring));
-
-			temp_ring[i].count = new_tx_count;
-			err = igb_setup_tx_resources(&temp_ring[i]);
-			if (err) {
-				while (i) {
-					i--;
-					igb_free_tx_resources(&temp_ring[i]);
-				}
-				goto err_setup;
-			}
-		}
-
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			igb_free_tx_resources(adapter->tx_ring[i]);
-
-			memcpy(adapter->tx_ring[i], &temp_ring[i],
-			       sizeof(struct igb_ring));
-		}
-
-		adapter->tx_ring_count = new_tx_count;
-	}
-
-	if (new_rx_count != adapter->rx_ring_count) {
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			memcpy(&temp_ring[i], adapter->rx_ring[i],
-			       sizeof(struct igb_ring));
-
-			temp_ring[i].count = new_rx_count;
-			err = igb_setup_rx_resources(&temp_ring[i]);
-			if (err) {
-				while (i) {
-					i--;
-					igb_free_rx_resources(&temp_ring[i]);
-				}
-				goto err_setup;
-			}
-
-		}
-
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			igb_free_rx_resources(adapter->rx_ring[i]);
-
-			memcpy(adapter->rx_ring[i], &temp_ring[i],
-			       sizeof(struct igb_ring));
-		}
-
-		adapter->rx_ring_count = new_rx_count;
-	}
-err_setup:
-	igb_up(adapter);
-	vfree(temp_ring);
-clear_reset:
-	clear_bit(__IGB_RESETTING, &adapter->state);
-	return err;
-}
-static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data,
-			     int reg, u32 mask, u32 write)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 pat, val;
-	static const u32 _test[] =
-		{0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
-	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {
-		E1000_WRITE_REG(hw, reg, (_test[pat] & write));
-		val = E1000_READ_REG(hw, reg) & mask;
-		if (val != (_test[pat] & write & mask)) {
-			dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X "
-				"failed: got 0x%08X expected 0x%08X\n",
-			        E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask));
-			*data = E1000_REGISTER(hw, reg);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data,
-			      int reg, u32 mask, u32 write)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 val;
-	E1000_WRITE_REG(hw, reg, write & mask);
-	val = E1000_READ_REG(hw, reg);
-	if ((write & mask) != (val & mask)) {
-		dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:"
-			" got 0x%08X expected 0x%08X\n", reg,
-			(val & mask), (write & mask));
-		*data = E1000_REGISTER(hw, reg);
-		return 1;
-	}
-
-	return 0;
-}
-
-#define REG_PATTERN_TEST(reg, mask, write) \
-	do { \
-		if (reg_pattern_test(adapter, data, reg, mask, write)) \
-			return 1; \
-	} while (0)
-
-#define REG_SET_AND_CHECK(reg, mask, write) \
-	do { \
-		if (reg_set_and_check(adapter, data, reg, mask, write)) \
-			return 1; \
-	} while (0)
-
-static int igb_reg_test(struct igb_adapter *adapter, u64 *data)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct igb_reg_test *test;
-	u32 value, before, after;
-	u32 i, toggle;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_i350:
-	case e1000_i354:
-		test = reg_test_i350;
-		toggle = 0x7FEFF3FF;
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		test = reg_test_i210;
-		toggle = 0x7FEFF3FF;
-		break;
-	case e1000_82580:
-		test = reg_test_82580;
-		toggle = 0x7FEFF3FF;
-		break;
-	case e1000_82576:
-		test = reg_test_82576;
-		toggle = 0x7FFFF3FF;
-		break;
-	default:
-		test = reg_test_82575;
-		toggle = 0x7FFFF3FF;
-		break;
-	}
-
-	/* Because the status register is such a special case,
-	 * we handle it separately from the rest of the register
-	 * tests.  Some bits are read-only, some toggle, and some
-	 * are writable on newer MACs.
-	 */
-	before = E1000_READ_REG(hw, E1000_STATUS);
-	value = (E1000_READ_REG(hw, E1000_STATUS) & toggle);
-	E1000_WRITE_REG(hw, E1000_STATUS, toggle);
-	after = E1000_READ_REG(hw, E1000_STATUS) & toggle;
-	if (value != after) {
-		dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test "
-			"got: 0x%08X expected: 0x%08X\n", after, value);
-		*data = 1;
-		return 1;
-	}
-	/* restore previous status */
-	E1000_WRITE_REG(hw, E1000_STATUS, before);
-
-	/* Perform the remainder of the register test, looping through
-	 * the test table until we either fail or reach the null entry.
-	 */
-	while (test->reg) {
-		for (i = 0; i < test->array_len; i++) {
-			switch (test->test_type) {
-			case PATTERN_TEST:
-				REG_PATTERN_TEST(test->reg +
-						(i * test->reg_offset),
-						test->mask,
-						test->write);
-				break;
-			case SET_READ_TEST:
-				REG_SET_AND_CHECK(test->reg +
-						(i * test->reg_offset),
-						test->mask,
-						test->write);
-				break;
-			case WRITE_NO_TEST:
-				writel(test->write,
-				       (adapter->hw.hw_addr + test->reg)
-					+ (i * test->reg_offset));
-				break;
-			case TABLE32_TEST:
-				REG_PATTERN_TEST(test->reg + (i * 4),
-						test->mask,
-						test->write);
-				break;
-			case TABLE64_TEST_LO:
-				REG_PATTERN_TEST(test->reg + (i * 8),
-						test->mask,
-						test->write);
-				break;
-			case TABLE64_TEST_HI:
-				REG_PATTERN_TEST((test->reg + 4) + (i * 8),
-						test->mask,
-						test->write);
-				break;
-			}
-		}
-		test++;
-	}
-
-	*data = 0;
-	return 0;
-}
-
-static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
-{
-	*data = 0;
-
-	/* Validate NVM checksum */
-	if (e1000_validate_nvm_checksum(&adapter->hw) < 0)
-		*data = 2;
-
-	return *data;
-}
-
-static irqreturn_t igb_test_intr(int irq, void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-
-	adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR);
-
-	return IRQ_HANDLED;
-}
-
-static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	u32 mask, ics_mask, i = 0, shared_int = TRUE;
-	u32 irq = adapter->pdev->irq;
-
-	*data = 0;
-
-	/* Hook up test interrupt handler just for this test */
-	if (adapter->msix_entries) {
-		if (request_irq(adapter->msix_entries[0].vector,
-		                &igb_test_intr, 0, netdev->name, adapter)) {
-			*data = 1;
-			return -1;
-		}
-	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
-		shared_int = FALSE;
-		if (request_irq(irq,
-		                igb_test_intr, 0, netdev->name, adapter)) {
-			*data = 1;
-			return -1;
-		}
-	} else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED,
-				netdev->name, adapter)) {
-		shared_int = FALSE;
-	} else if (request_irq(irq, &igb_test_intr, IRQF_SHARED,
-		 netdev->name, adapter)) {
-		*data = 1;
-		return -1;
-	}
-	dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n",
-		 (shared_int ? "shared" : "unshared"));
-
-	/* Disable all the interrupts */
-	E1000_WRITE_REG(hw, E1000_IMC, ~0);
-	E1000_WRITE_FLUSH(hw);
-	usleep_range(10000, 20000);
-
-	/* Define all writable bits for ICS */
-	switch (hw->mac.type) {
-	case e1000_82575:
-		ics_mask = 0x37F47EDD;
-		break;
-	case e1000_82576:
-		ics_mask = 0x77D4FBFD;
-		break;
-	case e1000_82580:
-		ics_mask = 0x77DCFED5;
-		break;
-	case e1000_i350:
-	case e1000_i354:
-		ics_mask = 0x77DCFED5;
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		ics_mask = 0x774CFED5;
-		break;
-	default:
-		ics_mask = 0x7FFFFFFF;
-		break;
-	}
-
-	/* Test each interrupt */
-	for (; i < 31; i++) {
-		/* Interrupt to test */
-		mask = 1 << i;
-
-		if (!(mask & ics_mask))
-			continue;
-
-		if (!shared_int) {
-			/* Disable the interrupt to be reported in
-			 * the cause register and then force the same
-			 * interrupt and see if one gets posted.  If
-			 * an interrupt was posted to the bus, the
-			 * test failed.
-			 */
-			adapter->test_icr = 0;
-
-			/* Flush any pending interrupts */
-			E1000_WRITE_REG(hw, E1000_ICR, ~0);
-
-			E1000_WRITE_REG(hw, E1000_IMC, mask);
-			E1000_WRITE_REG(hw, E1000_ICS, mask);
-			E1000_WRITE_FLUSH(hw);
-			usleep_range(10000, 20000);
-
-			if (adapter->test_icr & mask) {
-				*data = 3;
-				break;
-			}
-		}
-
-		/* Enable the interrupt to be reported in
-		 * the cause register and then force the same
-		 * interrupt and see if one gets posted.  If
-		 * an interrupt was not posted to the bus, the
-		 * test failed.
-		 */
-		adapter->test_icr = 0;
-
-		/* Flush any pending interrupts */
-		E1000_WRITE_REG(hw, E1000_ICR, ~0);
-
-		E1000_WRITE_REG(hw, E1000_IMS, mask);
-		E1000_WRITE_REG(hw, E1000_ICS, mask);
-		E1000_WRITE_FLUSH(hw);
-		usleep_range(10000, 20000);
-
-		if (!(adapter->test_icr & mask)) {
-			*data = 4;
-			break;
-		}
-
-		if (!shared_int) {
-			/* Disable the other interrupts to be reported in
-			 * the cause register and then force the other
-			 * interrupts and see if any get posted.  If
-			 * an interrupt was posted to the bus, the
-			 * test failed.
-			 */
-			adapter->test_icr = 0;
-
-			/* Flush any pending interrupts */
-			E1000_WRITE_REG(hw, E1000_ICR, ~0);
-
-			E1000_WRITE_REG(hw, E1000_IMC, ~mask);
-			E1000_WRITE_REG(hw, E1000_ICS, ~mask);
-			E1000_WRITE_FLUSH(hw);
-			usleep_range(10000, 20000);
-
-			if (adapter->test_icr & mask) {
-				*data = 5;
-				break;
-			}
-		}
-	}
-
-	/* Disable all the interrupts */
-	E1000_WRITE_REG(hw, E1000_IMC, ~0);
-	E1000_WRITE_FLUSH(hw);
-	usleep_range(10000, 20000);
-
-	/* Unhook test interrupt handler */
-	if (adapter->msix_entries)
-		free_irq(adapter->msix_entries[0].vector, adapter);
-	else
-		free_irq(irq, adapter);
-
-	return *data;
-}
-
-static void igb_free_desc_rings(struct igb_adapter *adapter)
-{
-	igb_free_tx_resources(&adapter->test_tx_ring);
-	igb_free_rx_resources(&adapter->test_rx_ring);
-}
-
-static int igb_setup_desc_rings(struct igb_adapter *adapter)
-{
-	struct igb_ring *tx_ring = &adapter->test_tx_ring;
-	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	struct e1000_hw *hw = &adapter->hw;
-	int ret_val;
-
-	/* Setup Tx descriptor ring and Tx buffers */
-	tx_ring->count = IGB_DEFAULT_TXD;
-	tx_ring->dev = pci_dev_to_dev(adapter->pdev);
-	tx_ring->netdev = adapter->netdev;
-	tx_ring->reg_idx = adapter->vfs_allocated_count;
-
-	if (igb_setup_tx_resources(tx_ring)) {
-		ret_val = 1;
-		goto err_nomem;
-	}
-
-	igb_setup_tctl(adapter);
-	igb_configure_tx_ring(adapter, tx_ring);
-
-	/* Setup Rx descriptor ring and Rx buffers */
-	rx_ring->count = IGB_DEFAULT_RXD;
-	rx_ring->dev = pci_dev_to_dev(adapter->pdev);
-	rx_ring->netdev = adapter->netdev;
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	rx_ring->rx_buffer_len = IGB_RX_HDR_LEN;
-#endif
-	rx_ring->reg_idx = adapter->vfs_allocated_count;
-
-	if (igb_setup_rx_resources(rx_ring)) {
-		ret_val = 2;
-		goto err_nomem;
-	}
-
-	/* set the default queue to queue 0 of PF */
-	E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3);
-
-	/* enable receive ring */
-	igb_setup_rctl(adapter);
-	igb_configure_rx_ring(adapter, rx_ring);
-
-	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
-
-	return 0;
-
-err_nomem:
-	igb_free_desc_rings(adapter);
-	return ret_val;
-}
-
-static void igb_phy_disable_receiver(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
-	e1000_write_phy_reg(hw, 29, 0x001F);
-	e1000_write_phy_reg(hw, 30, 0x8FFC);
-	e1000_write_phy_reg(hw, 29, 0x001A);
-	e1000_write_phy_reg(hw, 30, 0x8FF0);
-}
-
-static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl_reg = 0;
-
-	hw->mac.autoneg = FALSE;
-
-	if (hw->phy.type == e1000_phy_m88) {
-		if (hw->phy.id != I210_I_PHY_ID) {
-			/* Auto-MDI/MDIX Off */
-			e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
-			/* reset to update Auto-MDI/MDIX */
-			e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140);
-			/* autoneg off */
-			e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140);
-		} else {
-			/* force 1000, set loopback  */
-			e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
-			e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
-		}
-	} else {
-		/* enable MII loopback */
-		if (hw->phy.type == e1000_phy_82580)
-			e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041);
-	}
-
-	/* force 1000, set loopback  */
-	e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
-
-	/* Now set up the MAC to the same speed/duplex as the PHY. */
-	ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
-	ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
-	ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
-		     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
-		     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
-		     E1000_CTRL_FD |	 /* Force Duplex to FULL */
-		     E1000_CTRL_SLU);	 /* Set link up enable bit */
-
-	if (hw->phy.type == e1000_phy_m88)
-		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
-
-	E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
-
-	/* Disable the receiver on the PHY so when a cable is plugged in, the
-	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
-	 */
-	if (hw->phy.type == e1000_phy_m88)
-		igb_phy_disable_receiver(adapter);
-
-	mdelay(500);
-	return 0;
-}
-
-static int igb_set_phy_loopback(struct igb_adapter *adapter)
-{
-	return igb_integrated_phy_loopback(adapter);
-}
-
-static int igb_setup_loopback_test(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 reg;
-
-	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
-
-	/* use CTRL_EXT to identify link type as SGMII can appear as copper */
-	if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) {
-                if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
-                    (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
-                    (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
-                    (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
-
-                        /* Enable DH89xxCC MPHY for near end loopback */
-                        reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
-                        reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
-                                E1000_MPHY_PCS_CLK_REG_OFFSET;
-                        E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
-
-                        reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
-                        reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
-                        E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
-                }
-
-		reg = E1000_READ_REG(hw, E1000_RCTL);
-		reg |= E1000_RCTL_LBM_TCVR;
-		E1000_WRITE_REG(hw, E1000_RCTL, reg);
-
-		E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK);
-
-		reg = E1000_READ_REG(hw, E1000_CTRL);
-		reg &= ~(E1000_CTRL_RFCE |
-			 E1000_CTRL_TFCE |
-			 E1000_CTRL_LRST);
-		reg |= E1000_CTRL_SLU |
-		       E1000_CTRL_FD;
-		E1000_WRITE_REG(hw, E1000_CTRL, reg);
-
-		/* Unset switch control to serdes energy detect */
-		reg = E1000_READ_REG(hw, E1000_CONNSW);
-		reg &= ~E1000_CONNSW_ENRGSRC;
-		E1000_WRITE_REG(hw, E1000_CONNSW, reg);
-
-		/* Unset sigdetect for SERDES loopback on
-		 * 82580 and newer devices
-		 */
-		if (hw->mac.type >= e1000_82580) {
-			reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
-			reg |= E1000_PCS_CFG_IGN_SD;
-			E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
-		}
-
-		/* Set PCS register for forced speed */
-		reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
-		reg &= ~E1000_PCS_LCTL_AN_ENABLE;     /* Disable Autoneg*/
-		reg |= E1000_PCS_LCTL_FLV_LINK_UP |   /* Force link up */
-		       E1000_PCS_LCTL_FSV_1000 |      /* Force 1000    */
-		       E1000_PCS_LCTL_FDV_FULL |      /* SerDes Full duplex */
-		       E1000_PCS_LCTL_FSD |           /* Force Speed */
-		       E1000_PCS_LCTL_FORCE_LINK;     /* Force Link */
-		E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
-
-		return 0;
-	}
-
-	return igb_set_phy_loopback(adapter);
-}
-
-static void igb_loopback_cleanup(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 rctl;
-	u16 phy_reg;
-
-        if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
-	    (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
-	    (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
-            (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
-		u32 reg;
-
-		/* Disable near end loopback on DH89xxCC */
-		reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
-                reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) |
-                        E1000_MPHY_PCS_CLK_REG_OFFSET;
-	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
-
-		reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
-	reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
-	E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
-	}
-
-	rctl = E1000_READ_REG(hw, E1000_RCTL);
-	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
-	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-
-	hw->mac.autoneg = TRUE;
-	e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg);
-	if (phy_reg & MII_CR_LOOPBACK) {
-		phy_reg &= ~MII_CR_LOOPBACK;
-		if (hw->phy.type == I210_I_PHY_ID)
-			e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
-		e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg);
-		e1000_phy_commit(hw);
-	}
-}
-static void igb_create_lbtest_frame(struct sk_buff *skb,
-				    unsigned int frame_size)
-{
-	memset(skb->data, 0xFF, frame_size);
-	frame_size /= 2;
-	memset(&skb->data[frame_size], 0xAA, frame_size - 1);
-	memset(&skb->data[frame_size + 10], 0xBE, 1);
-	memset(&skb->data[frame_size + 12], 0xAF, 1);
-}
-
-static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
-				  unsigned int frame_size)
-{
-	unsigned char *data;
-	bool match = true;
-
-	frame_size >>= 1;
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	data = rx_buffer->skb->data;
-#else
-	data = kmap(rx_buffer->page);
-#endif
-
-	if (data[3] != 0xFF ||
-	    data[frame_size + 10] != 0xBE ||
-	    data[frame_size + 12] != 0xAF)
-		match = false;
-
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	kunmap(rx_buffer->page);
-
-#endif
-	return match;
-}
-
-static u16 igb_clean_test_rings(struct igb_ring *rx_ring,
-                                struct igb_ring *tx_ring,
-                                unsigned int size)
-{
-	union e1000_adv_rx_desc *rx_desc;
-	struct igb_rx_buffer *rx_buffer_info;
-	struct igb_tx_buffer *tx_buffer_info;
-	u16 rx_ntc, tx_ntc, count = 0;
-
-	/* initialize next to clean and descriptor values */
-	rx_ntc = rx_ring->next_to_clean;
-	tx_ntc = tx_ring->next_to_clean;
-	rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
-
-	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
-		/* check rx buffer */
-		rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
-
-		/* sync Rx buffer for CPU read */
-		dma_sync_single_for_cpu(rx_ring->dev,
-					rx_buffer_info->dma,
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-					IGB_RX_HDR_LEN,
-#else
-					IGB_RX_BUFSZ,
-#endif
-					DMA_FROM_DEVICE);
-
-		/* verify contents of skb */
-		if (igb_check_lbtest_frame(rx_buffer_info, size))
-			count++;
-
-		/* sync Rx buffer for device write */
-		dma_sync_single_for_device(rx_ring->dev,
-					   rx_buffer_info->dma,
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-					   IGB_RX_HDR_LEN,
-#else
-					   IGB_RX_BUFSZ,
-#endif
-					   DMA_FROM_DEVICE);
-
-		/* unmap buffer on tx side */
-		tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
-		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
-
-		/* increment rx/tx next to clean counters */
-		rx_ntc++;
-		if (rx_ntc == rx_ring->count)
-			rx_ntc = 0;
-		tx_ntc++;
-		if (tx_ntc == tx_ring->count)
-			tx_ntc = 0;
-
-		/* fetch next descriptor */
-		rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
-	}
-
-	/* re-map buffers to ring, store next to clean values */
-	igb_alloc_rx_buffers(rx_ring, count);
-	rx_ring->next_to_clean = rx_ntc;
-	tx_ring->next_to_clean = tx_ntc;
-
-	return count;
-}
-
-static int igb_run_loopback_test(struct igb_adapter *adapter)
-{
-	struct igb_ring *tx_ring = &adapter->test_tx_ring;
-	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	u16 i, j, lc, good_cnt;
-	int ret_val = 0;
-	unsigned int size = IGB_RX_HDR_LEN;
-	netdev_tx_t tx_ret_val;
-	struct sk_buff *skb;
-
-	/* allocate test skb */
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return 11;
-
-	/* place data into test skb */
-	igb_create_lbtest_frame(skb, size);
-	skb_put(skb, size);
-
-	/*
-	 * Calculate the loop count based on the largest descriptor ring
-	 * The idea is to wrap the largest ring a number of times using 64
-	 * send/receive pairs during each loop
-	 */
-
-	if (rx_ring->count <= tx_ring->count)
-		lc = ((tx_ring->count / 64) * 2) + 1;
-	else
-		lc = ((rx_ring->count / 64) * 2) + 1;
-
-	for (j = 0; j <= lc; j++) { /* loop count loop */
-		/* reset count of good packets */
-		good_cnt = 0;
-
-		/* place 64 packets on the transmit queue*/
-		for (i = 0; i < 64; i++) {
-			skb_get(skb);
-			tx_ret_val = igb_xmit_frame_ring(skb, tx_ring);
-			if (tx_ret_val == NETDEV_TX_OK)
-				good_cnt++;
-		}
-
-		if (good_cnt != 64) {
-			ret_val = 12;
-			break;
-		}
-
-		/* allow 200 milliseconds for packets to go from tx to rx */
-		msleep(200);
-
-		good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size);
-		if (good_cnt != 64) {
-			ret_val = 13;
-			break;
-		}
-	} /* end loop count loop */
-
-	/* free the original skb */
-	kfree_skb(skb);
-
-	return ret_val;
-}
-
-static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
-{
-	/* PHY loopback cannot be performed if SoL/IDER
-	 * sessions are active */
-	if (e1000_check_reset_block(&adapter->hw)) {
-		dev_err(pci_dev_to_dev(adapter->pdev),
-			"Cannot do PHY loopback test "
-			"when SoL/IDER is active.\n");
-		*data = 0;
-		goto out;
-	}
-	if (adapter->hw.mac.type == e1000_i354) {
-		dev_info(&adapter->pdev->dev,
-			"Loopback test not supported on i354.\n");
-		*data = 0;
-		goto out;
-	}
-	*data = igb_setup_desc_rings(adapter);
-	if (*data)
-		goto out;
-	*data = igb_setup_loopback_test(adapter);
-	if (*data)
-		goto err_loopback;
-	*data = igb_run_loopback_test(adapter);
-
-	igb_loopback_cleanup(adapter);
-
-err_loopback:
-	igb_free_desc_rings(adapter);
-out:
-	return *data;
-}
-
-static int igb_link_test(struct igb_adapter *adapter, u64 *data)
-{
-	u32 link;
-	int i, time;
-
-	*data = 0;
-	time = 0;
-	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
-		int i = 0;
-		adapter->hw.mac.serdes_has_link = FALSE;
-
-		/* On some blade server designs, link establishment
-		 * could take as long as 2-3 minutes */
-		do {
-			e1000_check_for_link(&adapter->hw);
-			if (adapter->hw.mac.serdes_has_link)
-				goto out;
-			msleep(20);
-		} while (i++ < 3750);
-
-		*data = 1;
-	} else {
-		for (i=0; i < IGB_MAX_LINK_TRIES; i++) {
-		link = igb_has_link(adapter);
-			if (link)
-				goto out;
-			else {
-				time++;
-				msleep(1000);
-			}
-		}
-		if (!link)
-			*data = 1;
-	}
-	out:
-		return *data;
-}
-
-static void igb_diag_test(struct net_device *netdev,
-			  struct ethtool_test *eth_test, u64 *data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	u16 autoneg_advertised;
-	u8 forced_speed_duplex, autoneg;
-	bool if_running = netif_running(netdev);
-
-	set_bit(__IGB_TESTING, &adapter->state);
-	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-		/* Offline tests */
-
-		/* save speed, duplex, autoneg settings */
-		autoneg_advertised = adapter->hw.phy.autoneg_advertised;
-		forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
-		autoneg = adapter->hw.mac.autoneg;
-
-		dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n");
-
-		/* power up link for link test */
-		igb_power_up_link(adapter);
-
-		/* Link test performed before hardware reset so autoneg doesn't
-		 * interfere with test result */
-		if (igb_link_test(adapter, &data[4]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		if (if_running)
-			/* indicate we're in test mode */
-			dev_close(netdev);
-		else
-			igb_reset(adapter);
-
-		if (igb_reg_test(adapter, &data[0]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		igb_reset(adapter);
-		if (igb_eeprom_test(adapter, &data[1]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		igb_reset(adapter);
-		if (igb_intr_test(adapter, &data[2]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		igb_reset(adapter);
-
-		/* power up link for loopback test */
-		igb_power_up_link(adapter);
-
-		if (igb_loopback_test(adapter, &data[3]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		/* restore speed, duplex, autoneg settings */
-		adapter->hw.phy.autoneg_advertised = autoneg_advertised;
-		adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
-		adapter->hw.mac.autoneg = autoneg;
-
-		/* force this routine to wait until autoneg complete/timeout */
-		adapter->hw.phy.autoneg_wait_to_complete = TRUE;
-		igb_reset(adapter);
-		adapter->hw.phy.autoneg_wait_to_complete = FALSE;
-
-		clear_bit(__IGB_TESTING, &adapter->state);
-		if (if_running)
-			dev_open(netdev);
-	} else {
-		dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n");
-
-		/* PHY is powered down when interface is down */
-		if (if_running && igb_link_test(adapter, &data[4]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-		else
-			data[4] = 0;
-
-		/* Online tests aren't run; pass by default */
-		data[0] = 0;
-		data[1] = 0;
-		data[2] = 0;
-		data[3] = 0;
-
-		clear_bit(__IGB_TESTING, &adapter->state);
-	}
-	msleep_interruptible(4 * 1000);
-}
-
-static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	wol->supported = WAKE_UCAST | WAKE_MCAST |
-	                 WAKE_BCAST | WAKE_MAGIC |
-	                 WAKE_PHY;
-	wol->wolopts = 0;
-
-	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
-		return;
-
-	/* apply any specific unsupported masks here */
-	switch (adapter->hw.device_id) {
-	default:
-		break;
-	}
-
-	if (adapter->wol & E1000_WUFC_EX)
-		wol->wolopts |= WAKE_UCAST;
-	if (adapter->wol & E1000_WUFC_MC)
-		wol->wolopts |= WAKE_MCAST;
-	if (adapter->wol & E1000_WUFC_BC)
-		wol->wolopts |= WAKE_BCAST;
-	if (adapter->wol & E1000_WUFC_MAG)
-		wol->wolopts |= WAKE_MAGIC;
-	if (adapter->wol & E1000_WUFC_LNKC)
-		wol->wolopts |= WAKE_PHY;
-}
-
-static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE))
-		return -EOPNOTSUPP;
-
-	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
-		return wol->wolopts ? -EOPNOTSUPP : 0;
-
-	/* these settings will always override what we currently have */
-	adapter->wol = 0;
-
-	if (wol->wolopts & WAKE_UCAST)
-		adapter->wol |= E1000_WUFC_EX;
-	if (wol->wolopts & WAKE_MCAST)
-		adapter->wol |= E1000_WUFC_MC;
-	if (wol->wolopts & WAKE_BCAST)
-		adapter->wol |= E1000_WUFC_BC;
-	if (wol->wolopts & WAKE_MAGIC)
-		adapter->wol |= E1000_WUFC_MAG;
-	if (wol->wolopts & WAKE_PHY)
-		adapter->wol |= E1000_WUFC_LNKC;
-	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
-
-	return 0;
-}
-
-/* bit defines for adapter->led_status */
-#ifdef HAVE_ETHTOOL_SET_PHYS_ID
-static int igb_set_phys_id(struct net_device *netdev,
-                           enum ethtool_phys_id_state state)
-{
-        struct igb_adapter *adapter = netdev_priv(netdev);
-        struct e1000_hw *hw = &adapter->hw;
-
-        switch (state) {
-        case ETHTOOL_ID_ACTIVE:
-		e1000_blink_led(hw);
-                return 2;
-        case ETHTOOL_ID_ON:
-                e1000_led_on(hw);
-                break;
-        case ETHTOOL_ID_OFF:
-                e1000_led_off(hw);
-                break;
-        case ETHTOOL_ID_INACTIVE:
-		e1000_led_off(hw);
-		e1000_cleanup_led(hw);
-                break;
-        }
-
-        return 0;
-}
-#else
-static int igb_phys_id(struct net_device *netdev, u32 data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	unsigned long timeout;
-
-	timeout = data * 1000;
-
-	/*
-	 *  msleep_interruptable only accepts unsigned int so we are limited
-	 * in how long a duration we can wait
-	 */
-	if (!timeout || timeout > UINT_MAX)
-		timeout = UINT_MAX;
-
-	e1000_blink_led(hw);
-	msleep_interruptible(timeout);
-
-	e1000_led_off(hw);
-	e1000_cleanup_led(hw);
-
-	return 0;
-}
-#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
-
-static int igb_set_coalesce(struct net_device *netdev,
-			    struct ethtool_coalesce *ec)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	int i;
-
-	if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
-	    ((ec->rx_coalesce_usecs > 3) &&
-	     (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
-	    (ec->rx_coalesce_usecs == 2))
-	    {
-	    	printk("set_coalesce:invalid parameter..");
-		return -EINVAL;
-	}
-
-	if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
-	    ((ec->tx_coalesce_usecs > 3) &&
-	     (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
-	    (ec->tx_coalesce_usecs == 2))
-		return -EINVAL;
-
-	if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
-		return -EINVAL;
-
-	if (ec->tx_max_coalesced_frames_irq)
-		adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
-
-	/* If ITR is disabled, disable DMAC */
-	if (ec->rx_coalesce_usecs == 0) {
-		adapter->dmac = IGB_DMAC_DISABLE;
-	}
-
-	/* convert to rate of irq's per second */
-	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
-		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
-	else
-		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
-
-	/* convert to rate of irq's per second */
-	if (adapter->flags & IGB_FLAG_QUEUE_PAIRS)
-		adapter->tx_itr_setting = adapter->rx_itr_setting;
-	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
-		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
-	else
-		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
-
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		q_vector->tx.work_limit = adapter->tx_work_limit;
-		if (q_vector->rx.ring)
-			q_vector->itr_val = adapter->rx_itr_setting;
-		else
-			q_vector->itr_val = adapter->tx_itr_setting;
-		if (q_vector->itr_val && q_vector->itr_val <= 3)
-			q_vector->itr_val = IGB_START_ITR;
-		q_vector->set_itr = 1;
-	}
-
-	return 0;
-}
-
-static int igb_get_coalesce(struct net_device *netdev,
-			    struct ethtool_coalesce *ec)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (adapter->rx_itr_setting <= 3)
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
-	else
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
-
-	ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
-
-	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {
-		if (adapter->tx_itr_setting <= 3)
-			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
-		else
-			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
-	}
-
-	return 0;
-}
-
-static int igb_nway_reset(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	if (netif_running(netdev))
-		igb_reinit_locked(adapter);
-	return 0;
-}
-
-#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
-static int igb_get_sset_count(struct net_device *netdev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_STATS:
-		return IGB_STATS_LEN;
-	case ETH_SS_TEST:
-		return IGB_TEST_LEN;
-	default:
-		return -ENOTSUPP;
-	}
-}
-#else
-static int igb_get_stats_count(struct net_device *netdev)
-{
-	return IGB_STATS_LEN;
-}
-
-static int igb_diag_test_count(struct net_device *netdev)
-{
-	return IGB_TEST_LEN;
-}
-#endif
-
-static void igb_get_ethtool_stats(struct net_device *netdev,
-				  struct ethtool_stats *stats, u64 *data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats *net_stats = &netdev->stats;
-#else
-	struct net_device_stats *net_stats = &adapter->net_stats;
-#endif
-	u64 *queue_stat;
-	int i, j, k;
-	char *p;
-
-	igb_update_stats(adapter);
-
-	for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
-		p = (char *)adapter + igb_gstrings_stats[i].stat_offset;
-		data[i] = (igb_gstrings_stats[i].sizeof_stat ==
-			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-	}
-	for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) {
-		p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset;
-		data[i] = (igb_gstrings_net_stats[j].sizeof_stat ==
-			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-	}
-	for (j = 0; j < adapter->num_tx_queues; j++) {
-		queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats;
-		for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++)
-			data[i] = queue_stat[k];
-	}
-	for (j = 0; j < adapter->num_rx_queues; j++) {
-		queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats;
-		for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++)
-			data[i] = queue_stat[k];
-	}
-}
-
-static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	u8 *p = data;
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_TEST:
-		memcpy(data, *igb_gstrings_test,
-			IGB_TEST_LEN*ETH_GSTRING_LEN);
-		break;
-	case ETH_SS_STATS:
-		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_net_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_restart", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_drops", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_csum_err", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_alloc_failed", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_ipv4_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_ipv4e_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_ipv6_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_ipv6e_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_tcp_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_udp_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_sctp_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_nfs_packets", i);
-			p += ETH_GSTRING_LEN;
-		}
-/*		BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
-		break;
-	}
-}
-
-#ifdef HAVE_ETHTOOL_GET_TS_INFO
-static int igb_get_ts_info(struct net_device *dev,
-			   struct ethtool_ts_info *info)
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-
-	switch (adapter->hw.mac.type) {
-#ifdef HAVE_PTP_1588_CLOCK
-	case e1000_82575:
-		info->so_timestamping =
-			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE;
-		return 0;
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		info->so_timestamping =
-			SOF_TIMESTAMPING_TX_SOFTWARE |
-			SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE |
-			SOF_TIMESTAMPING_TX_HARDWARE |
-			SOF_TIMESTAMPING_RX_HARDWARE |
-			SOF_TIMESTAMPING_RAW_HARDWARE;
-
-		if (adapter->ptp_clock)
-			info->phc_index = ptp_clock_index(adapter->ptp_clock);
-		else
-			info->phc_index = -1;
-
-		info->tx_types =
-			(1 << HWTSTAMP_TX_OFF) |
-			(1 << HWTSTAMP_TX_ON);
-
-		info->rx_filters = 1 << HWTSTAMP_FILTER_NONE;
-
-		/* 82576 does not support timestamping all packets. */
-		if (adapter->hw.mac.type >= e1000_82580)
-			info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL;
-		else
-			info->rx_filters |=
-				(1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
-				(1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
-				(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
-				(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
-				(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
-				(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
-				(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
-
-		return 0;
-#endif /* HAVE_PTP_1588_CLOCK */
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-#endif /* HAVE_ETHTOOL_GET_TS_INFO */
-
-#ifdef CONFIG_PM_RUNTIME
-static int igb_ethtool_begin(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	pm_runtime_get_sync(&adapter->pdev->dev);
-
-	return 0;
-}
-
-static void igb_ethtool_complete(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	pm_runtime_put(&adapter->pdev->dev);
-}
-#endif /* CONFIG_PM_RUNTIME */
-
-#ifndef HAVE_NDO_SET_FEATURES
-static u32 igb_get_rx_csum(struct net_device *netdev)
-{
-	return !!(netdev->features & NETIF_F_RXCSUM);
-}
-
-static int igb_set_rx_csum(struct net_device *netdev, u32 data)
-{
-	const u32 feature_list = NETIF_F_RXCSUM;
-
-	if (data)
-		netdev->features |= feature_list;
-	else
-		netdev->features &= ~feature_list;
-
-	return 0;
-}
-
-static int igb_set_tx_csum(struct net_device *netdev, u32 data)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-#ifdef NETIF_F_IPV6_CSUM
-	u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-#else
-	u32 feature_list = NETIF_F_IP_CSUM;
-#endif
-
-	if (adapter->hw.mac.type >= e1000_82576)
-		feature_list |= NETIF_F_SCTP_CSUM;
-
-	if (data)
-		netdev->features |= feature_list;
-	else
-		netdev->features &= ~feature_list;
-
-	return 0;
-}
-
-#ifdef NETIF_F_TSO
-static int igb_set_tso(struct net_device *netdev, u32 data)
-{
-#ifdef NETIF_F_TSO6
-	const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6;
-#else
-	const u32 feature_list = NETIF_F_TSO;
-#endif
-
-	if (data)
-		netdev->features |= feature_list;
-	else
-		netdev->features &= ~feature_list;
-
-#ifndef HAVE_NETDEV_VLAN_FEATURES
-	if (!data) {
-		struct igb_adapter *adapter = netdev_priv(netdev);
-		struct net_device *v_netdev;
-		int i;
-
-		/* disable TSO on all VLANs if they're present */
-		if (!adapter->vlgrp)
-			goto tso_out;
-
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
-			v_netdev = vlan_group_get_device(adapter->vlgrp, i);
-			if (!v_netdev)
-				continue;
-
-			v_netdev->features &= ~feature_list;
-			vlan_group_set_device(adapter->vlgrp, i, v_netdev);
-		}
-	}
-
-tso_out:
-
-#endif /* HAVE_NETDEV_VLAN_FEATURES */
-	return 0;
-}
-
-#endif /* NETIF_F_TSO */
-#ifdef ETHTOOL_GFLAGS
-static int igb_set_flags(struct net_device *netdev, u32 data)
-{
-	u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN |
-			      ETH_FLAG_RXHASH;
-#ifndef HAVE_VLAN_RX_REGISTER
-	u32 changed = netdev->features ^ data;
-#endif
-	int rc;
-#ifndef IGB_NO_LRO
-
-	supported_flags |= ETH_FLAG_LRO;
-#endif
-	/*
-	 * Since there is no support for separate tx vlan accel
-	 * enabled make sure tx flag is cleared if rx is.
-	 */
-	if (!(data & ETH_FLAG_RXVLAN))
-		data &= ~ETH_FLAG_TXVLAN;
-
-	rc = ethtool_op_set_flags(netdev, data, supported_flags);
-	if (rc)
-		return rc;
-#ifndef HAVE_VLAN_RX_REGISTER
-
-	if (changed & ETH_FLAG_RXVLAN)
-		igb_vlan_mode(netdev, data);
-#endif
-
-	return 0;
-}
-
-#endif /* ETHTOOL_GFLAGS */
-#endif /* HAVE_NDO_SET_FEATURES */
-#ifdef ETHTOOL_SADV_COAL
-static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	switch (edata->data) {
-	case IGB_DMAC_DISABLE:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_MIN:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_500:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_EN_DEFAULT:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_2000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_3000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_4000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_5000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_6000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_7000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_8000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_9000:
-		adapter->dmac = edata->data;
-		break;
-	case IGB_DMAC_MAX:
-		adapter->dmac = edata->data;
-		break;
-	default:
-		adapter->dmac = IGB_DMAC_DISABLE;
-		printk("set_dmac: invalid setting, setting DMAC to %d\n",
-			adapter->dmac);
-	}
-	printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac);
-	return 0;
-}
-#endif /* ETHTOOL_SADV_COAL */
-#ifdef ETHTOOL_GADV_COAL
-static void igb_get_dmac(struct net_device *netdev,
-			    struct ethtool_value *edata)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	edata->data = adapter->dmac;
-
-	return;
-}
-#endif
-
-#ifdef ETHTOOL_GEEE
-static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ret_val;
-	u16 phy_data;
-
-	if ((hw->mac.type < e1000_i350) ||
-	    (hw->phy.media_type != e1000_media_type_copper))
-		return -EOPNOTSUPP;
-
-	edata->supported = (SUPPORTED_1000baseT_Full |
-			    SUPPORTED_100baseT_Full);
-
-	if (!hw->dev_spec._82575.eee_disable)
-		edata->advertised =
-			mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
-
-	/* The IPCNFG and EEER registers are not supported on I354. */
-	if (hw->mac.type == e1000_i354) {
-		e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active);
-	} else {
-		u32 eeer;
-
-		eeer = E1000_READ_REG(hw, E1000_EEER);
-
-		/* EEE status on negotiated link */
-		if (eeer & E1000_EEER_EEE_NEG)
-			edata->eee_active = true;
-
-		if (eeer & E1000_EEER_TX_LPI_EN)
-			edata->tx_lpi_enabled = true;
-	}
-
-	/* EEE Link Partner Advertised */
-	switch (hw->mac.type) {
-	case e1000_i350:
-		ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350,
-					     &phy_data);
-		if (ret_val)
-			return -ENODATA;
-
-		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
-
-		break;
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210,
-					       E1000_EEE_LP_ADV_DEV_I210,
-					       &phy_data);
-		if (ret_val)
-			return -ENODATA;
-
-		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
-
-		break;
-	default:
-		break;
-	}
-
-	edata->eee_enabled = !hw->dev_spec._82575.eee_disable;
-
-	if ((hw->mac.type == e1000_i354) &&
-	    (edata->eee_enabled))
-		edata->tx_lpi_enabled = true;
-
-	/*
-	 * report correct negotiated EEE status for devices that
-	 * wrongly report EEE at half-duplex
-	 */
-	if (adapter->link_duplex == HALF_DUPLEX) {
-		edata->eee_enabled = false;
-		edata->eee_active = false;
-		edata->tx_lpi_enabled = false;
-		edata->advertised &= ~edata->advertised;
-	}
-
-	return 0;
-}
-#endif
-
-#ifdef ETHTOOL_SEEE
-static int igb_set_eee(struct net_device *netdev,
-		       struct ethtool_eee *edata)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct ethtool_eee eee_curr;
-	s32 ret_val;
-
-	if ((hw->mac.type < e1000_i350) ||
-	    (hw->phy.media_type != e1000_media_type_copper))
-		return -EOPNOTSUPP;
-
-	ret_val = igb_get_eee(netdev, &eee_curr);
-	if (ret_val)
-		return ret_val;
-
-	if (eee_curr.eee_enabled) {
-		if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
-			dev_err(pci_dev_to_dev(adapter->pdev),
-				"Setting EEE tx-lpi is not supported\n");
-			return -EINVAL;
-		}
-
-		/* Tx LPI time is not implemented currently */
-		if (edata->tx_lpi_timer) {
-			dev_err(pci_dev_to_dev(adapter->pdev),
-				"Setting EEE Tx LPI timer is not supported\n");
-			return -EINVAL;
-		}
-
-		if (edata->advertised &
-		    ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
-			dev_err(pci_dev_to_dev(adapter->pdev),
-				"EEE Advertisement supports only 100Tx and or 100T full duplex\n");
-			return -EINVAL;
-		}
-
-	} else if (!edata->eee_enabled) {
-		dev_err(pci_dev_to_dev(adapter->pdev),
-			"Setting EEE options is not supported with EEE disabled\n");
-			return -EINVAL;
-		}
-
-	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
-
-	if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {
-		hw->dev_spec._82575.eee_disable = !edata->eee_enabled;
-
-		/* reset link */
-		if (netif_running(netdev))
-			igb_reinit_locked(adapter);
-		else
-			igb_reset(adapter);
-	}
-
-	return 0;
-}
-#endif /* ETHTOOL_SEEE */
-
-#ifdef ETHTOOL_GRXRINGS
-static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
-				 struct ethtool_rxnfc *cmd)
-{
-	cmd->data = 0;
-
-	/* Report default options for RSS on igb */
-	switch (cmd->flow_type) {
-	case TCP_V4_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case UDP_V4_FLOW:
-		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
-			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case SCTP_V4_FLOW:
-	case AH_ESP_V4_FLOW:
-	case AH_V4_FLOW:
-	case ESP_V4_FLOW:
-	case IPV4_FLOW:
-		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-		break;
-	case TCP_V6_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case UDP_V6_FLOW:
-		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
-			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case SCTP_V6_FLOW:
-	case AH_ESP_V6_FLOW:
-	case AH_V6_FLOW:
-	case ESP_V6_FLOW:
-	case IPV6_FLOW:
-		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
-#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
-			   void *rule_locs)
-#else
-			   u32 *rule_locs)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
-
-	switch (cmd->cmd) {
-	case ETHTOOL_GRXRINGS:
-		cmd->data = adapter->num_rx_queues;
-		ret = 0;
-		break;
-	case ETHTOOL_GRXFH:
-		ret = igb_get_rss_hash_opts(adapter, cmd);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
-#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \
-		       IGB_FLAG_RSS_FIELD_IPV6_UDP)
-static int igb_set_rss_hash_opt(struct igb_adapter *adapter,
-				struct ethtool_rxnfc *nfc)
-{
-	u32 flags = adapter->flags;
-
-	/*
-	 * RSS does not support anything other than hashing
-	 * to queues on src and dst IPs and ports
-	 */
-	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
-			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
-		return -EINVAL;
-
-	switch (nfc->flow_type) {
-	case TCP_V4_FLOW:
-	case TCP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST) ||
-		    !(nfc->data & RXH_L4_B_0_1) ||
-		    !(nfc->data & RXH_L4_B_2_3))
-			return -EINVAL;
-		break;
-	case UDP_V4_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST))
-			return -EINVAL;
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP;
-			break;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	case UDP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST))
-			return -EINVAL;
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP;
-			break;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	case AH_ESP_V4_FLOW:
-	case AH_V4_FLOW:
-	case ESP_V4_FLOW:
-	case SCTP_V4_FLOW:
-	case AH_ESP_V6_FLOW:
-	case AH_V6_FLOW:
-	case ESP_V6_FLOW:
-	case SCTP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST) ||
-		    (nfc->data & RXH_L4_B_0_1) ||
-		    (nfc->data & RXH_L4_B_2_3))
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* if we changed something we need to update flags */
-	if (flags != adapter->flags) {
-		struct e1000_hw *hw = &adapter->hw;
-		u32 mrqc = E1000_READ_REG(hw, E1000_MRQC);
-
-		if ((flags & UDP_RSS_FLAGS) &&
-		    !(adapter->flags & UDP_RSS_FLAGS))
-			DPRINTK(DRV, WARNING,
-				"enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
-
-		adapter->flags = flags;
-
-		/* Perform hash on these packet types */
-		mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
-			E1000_MRQC_RSS_FIELD_IPV4_TCP |
-			E1000_MRQC_RSS_FIELD_IPV6 |
-			E1000_MRQC_RSS_FIELD_IPV6_TCP;
-
-		mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP |
-			  E1000_MRQC_RSS_FIELD_IPV6_UDP);
-
-		if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
-			mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
-
-		if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
-			mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
-
-		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
-	}
-
-	return 0;
-}
-
-static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
-
-	switch (cmd->cmd) {
-	case ETHTOOL_SRXFH:
-		ret = igb_set_rss_hash_opt(adapter, cmd);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-#endif /* ETHTOOL_GRXRINGS */
-
-static const struct ethtool_ops igb_ethtool_ops = {
-	.get_settings           = igb_get_settings,
-	.set_settings           = igb_set_settings,
-	.get_drvinfo            = igb_get_drvinfo,
-	.get_regs_len           = igb_get_regs_len,
-	.get_regs               = igb_get_regs,
-	.get_wol                = igb_get_wol,
-	.set_wol                = igb_set_wol,
-	.get_msglevel           = igb_get_msglevel,
-	.set_msglevel           = igb_set_msglevel,
-	.nway_reset             = igb_nway_reset,
-	.get_link               = igb_get_link,
-	.get_eeprom_len         = igb_get_eeprom_len,
-	.get_eeprom             = igb_get_eeprom,
-	.set_eeprom             = igb_set_eeprom,
-	.get_ringparam          = igb_get_ringparam,
-	.set_ringparam          = igb_set_ringparam,
-	.get_pauseparam         = igb_get_pauseparam,
-	.set_pauseparam         = igb_set_pauseparam,
-	.self_test              = igb_diag_test,
-	.get_strings            = igb_get_strings,
-#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
-#ifdef HAVE_ETHTOOL_SET_PHYS_ID
-	.set_phys_id            = igb_set_phys_id,
-#else
-	.phys_id                = igb_phys_id,
-#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
-#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
-#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
-	.get_sset_count         = igb_get_sset_count,
-#else
-	.get_stats_count        = igb_get_stats_count,
-	.self_test_count        = igb_diag_test_count,
-#endif
-	.get_ethtool_stats      = igb_get_ethtool_stats,
-#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
-	.get_perm_addr          = ethtool_op_get_perm_addr,
-#endif
-	.get_coalesce           = igb_get_coalesce,
-	.set_coalesce           = igb_set_coalesce,
-#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
-#ifdef HAVE_ETHTOOL_GET_TS_INFO
-	.get_ts_info            = igb_get_ts_info,
-#endif /* HAVE_ETHTOOL_GET_TS_INFO */
-#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
-#ifdef CONFIG_PM_RUNTIME
-	.begin			= igb_ethtool_begin,
-	.complete		= igb_ethtool_complete,
-#endif /* CONFIG_PM_RUNTIME */
-#ifndef HAVE_NDO_SET_FEATURES
-	.get_rx_csum            = igb_get_rx_csum,
-	.set_rx_csum            = igb_set_rx_csum,
-	.get_tx_csum            = ethtool_op_get_tx_csum,
-	.set_tx_csum            = igb_set_tx_csum,
-	.get_sg                 = ethtool_op_get_sg,
-	.set_sg                 = ethtool_op_set_sg,
-#ifdef NETIF_F_TSO
-	.get_tso                = ethtool_op_get_tso,
-	.set_tso                = igb_set_tso,
-#endif
-#ifdef ETHTOOL_GFLAGS
-	.get_flags              = ethtool_op_get_flags,
-	.set_flags              = igb_set_flags,
-#endif /* ETHTOOL_GFLAGS */
-#endif /* HAVE_NDO_SET_FEATURES */
-#ifdef ETHTOOL_GADV_COAL
-	.get_advcoal		= igb_get_adv_coal,
-	.set_advcoal		= igb_set_dmac_coal,
-#endif /* ETHTOOL_GADV_COAL */
-#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
-#ifdef ETHTOOL_GEEE
-	.get_eee		= igb_get_eee,
-#endif
-#ifdef ETHTOOL_SEEE
-	.set_eee		= igb_set_eee,
-#endif
-#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
-#ifdef ETHTOOL_GRXRINGS
-	.get_rxnfc		= igb_get_rxnfc,
-	.set_rxnfc		= igb_set_rxnfc,
-#endif
-};
-
-#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
-static const struct ethtool_ops_ext igb_ethtool_ops_ext = {
-	.size		= sizeof(struct ethtool_ops_ext),
-	.get_ts_info	= igb_get_ts_info,
-	.set_phys_id	= igb_set_phys_id,
-	.get_eee	= igb_get_eee,
-	.set_eee	= igb_set_eee,
-};
-
-void igb_set_ethtool_ops(struct net_device *netdev)
-{
-	SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
-	set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext);
-}
-#else
-void igb_set_ethtool_ops(struct net_device *netdev)
-{
-	/* have to "undeclare" const on this struct to remove warnings */
-	SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops);
-}
-#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
-#endif	/* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
deleted file mode 100644
index af378d2f..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
+++ /dev/null
@@ -1,10344 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/netdevice.h>
-#include <linux/tcp.h>
-#ifdef NETIF_F_TSO
-#include <net/checksum.h>
-#ifdef NETIF_F_TSO6
-#include <linux/ipv6.h>
-#include <net/ip6_checksum.h>
-#endif
-#endif
-#ifdef SIOCGMIIPHY
-#include <linux/mii.h>
-#endif
-#ifdef SIOCETHTOOL
-#include <linux/ethtool.h>
-#endif
-#include <linux/if_vlan.h>
-#ifdef CONFIG_PM_RUNTIME
-#include <linux/pm_runtime.h>
-#endif /* CONFIG_PM_RUNTIME */
-
-#include <linux/if_bridge.h>
-#include "igb.h"
-#include "igb_vmdq.h"
-
-#include <linux/uio_driver.h>
-
-#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR)
-#define DRV_DEBUG "_debug"
-#else
-#define DRV_DEBUG
-#endif
-#define DRV_HW_PERF
-#define VERSION_SUFFIX
-
-#define MAJ 5
-#define MIN 0
-#define BUILD 6
-#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
-
-char igb_driver_name[] = "igb";
-char igb_driver_version[] = DRV_VERSION;
-static const char igb_driver_string[] =
-                                "Intel(R) Gigabit Ethernet Network Driver";
-static const char igb_copyright[] =
-				"Copyright (c) 2007-2013 Intel Corporation.";
-
-const struct pci_device_id igb_pci_tbl[] = {
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
-	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
-	/* required last entry */
-	{0, }
-};
-
-//MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
-static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
-void igb_reset(struct igb_adapter *);
-static int igb_setup_all_tx_resources(struct igb_adapter *);
-static int igb_setup_all_rx_resources(struct igb_adapter *);
-static void igb_free_all_tx_resources(struct igb_adapter *);
-static void igb_free_all_rx_resources(struct igb_adapter *);
-static void igb_setup_mrqc(struct igb_adapter *);
-void igb_update_stats(struct igb_adapter *);
-static int igb_probe(struct pci_dev *, const struct pci_device_id *);
-static void __devexit igb_remove(struct pci_dev *pdev);
-static int igb_sw_init(struct igb_adapter *);
-static int igb_open(struct net_device *);
-static int igb_close(struct net_device *);
-static void igb_configure(struct igb_adapter *);
-static void igb_configure_tx(struct igb_adapter *);
-static void igb_configure_rx(struct igb_adapter *);
-static void igb_clean_all_tx_rings(struct igb_adapter *);
-static void igb_clean_all_rx_rings(struct igb_adapter *);
-static void igb_clean_tx_ring(struct igb_ring *);
-static void igb_set_rx_mode(struct net_device *);
-#ifdef HAVE_TIMER_SETUP
-static void igb_update_phy_info(struct timer_list *);
-static void igb_watchdog(struct timer_list *);
-#else
-static void igb_update_phy_info(unsigned long);
-static void igb_watchdog(unsigned long);
-#endif
-static void igb_watchdog_task(struct work_struct *);
-static void igb_dma_err_task(struct work_struct *);
-#ifdef HAVE_TIMER_SETUP
-static void igb_dma_err_timer(struct timer_list *);
-#else
-static void igb_dma_err_timer(unsigned long data);
-#endif
-static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
-static struct net_device_stats *igb_get_stats(struct net_device *);
-static int igb_change_mtu(struct net_device *, int);
-void igb_full_sync_mac_table(struct igb_adapter *adapter);
-static int igb_set_mac(struct net_device *, void *);
-static void igb_set_uta(struct igb_adapter *adapter);
-static irqreturn_t igb_intr(int irq, void *);
-static irqreturn_t igb_intr_msi(int irq, void *);
-static irqreturn_t igb_msix_other(int irq, void *);
-static irqreturn_t igb_msix_ring(int irq, void *);
-#ifdef IGB_DCA
-static void igb_update_dca(struct igb_q_vector *);
-static void igb_setup_dca(struct igb_adapter *);
-#endif /* IGB_DCA */
-static int igb_poll(struct napi_struct *, int);
-static bool igb_clean_tx_irq(struct igb_q_vector *);
-static bool igb_clean_rx_irq(struct igb_q_vector *, int);
-static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
-static void igb_tx_timeout(struct net_device *);
-static void igb_reset_task(struct work_struct *);
-#ifdef HAVE_VLAN_RX_REGISTER
-static void igb_vlan_mode(struct net_device *, struct vlan_group *);
-#endif
-#ifdef HAVE_VLAN_PROTOCOL
-static int igb_vlan_rx_add_vid(struct net_device *,
-                               __be16 proto, u16);
-static int igb_vlan_rx_kill_vid(struct net_device *,
-                                __be16 proto, u16);
-#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-static int igb_vlan_rx_add_vid(struct net_device *,
-			       __always_unused __be16 proto, u16);
-static int igb_vlan_rx_kill_vid(struct net_device *,
-			        __always_unused __be16 proto, u16);
-#else
-static int igb_vlan_rx_add_vid(struct net_device *, u16);
-static int igb_vlan_rx_kill_vid(struct net_device *, u16);
-#endif
-#else
-static void igb_vlan_rx_add_vid(struct net_device *, u16);
-static void igb_vlan_rx_kill_vid(struct net_device *, u16);
-#endif
-static void igb_restore_vlan(struct igb_adapter *);
-void igb_rar_set(struct igb_adapter *adapter, u32 index);
-static void igb_ping_all_vfs(struct igb_adapter *);
-static void igb_msg_task(struct igb_adapter *);
-static void igb_vmm_control(struct igb_adapter *);
-static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
-static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
-static void igb_process_mdd_event(struct igb_adapter *);
-#ifdef IFLA_VF_MAX
-static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
-static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-#ifdef HAVE_VF_VLAN_PROTO
-				int vf, u16 vlan, u8 qos, __be16 vlan_proto);
-#else
-				int vf, u16 vlan, u8 qos);
-#endif
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
-				bool setting);
-#endif
-#ifdef HAVE_VF_MIN_MAX_TXRATE
-static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
-#else /* HAVE_VF_MIN_MAX_TXRATE */
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
-#endif /* HAVE_VF_MIN_MAX_TXRATE */
-static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
-				 struct ifla_vf_info *ivi);
-static void igb_check_vf_rate_limit(struct igb_adapter *);
-#endif
-static int igb_vf_configure(struct igb_adapter *adapter, int vf);
-#ifdef CONFIG_PM
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-static int igb_suspend(struct device *dev);
-static int igb_resume(struct device *dev);
-#ifdef CONFIG_PM_RUNTIME
-static int igb_runtime_suspend(struct device *dev);
-static int igb_runtime_resume(struct device *dev);
-static int igb_runtime_idle(struct device *dev);
-#endif /* CONFIG_PM_RUNTIME */
-static const struct dev_pm_ops igb_pm_ops = {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
-        .suspend = igb_suspend,
-        .resume = igb_resume,
-        .freeze = igb_suspend,
-        .thaw = igb_resume,
-        .poweroff = igb_suspend,
-        .restore = igb_resume,
-#ifdef CONFIG_PM_RUNTIME
-        .runtime_suspend = igb_runtime_suspend,
-        .runtime_resume = igb_runtime_resume,
-        .runtime_idle = igb_runtime_idle,
-#endif
-#else /* Linux >= 2.6.34 */
-	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
-#ifdef CONFIG_PM_RUNTIME
-	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
-			igb_runtime_idle)
-#endif /* CONFIG_PM_RUNTIME */
-#endif /* Linux version */
-};
-#else
-static int igb_suspend(struct pci_dev *pdev, pm_message_t state);
-static int igb_resume(struct pci_dev *pdev);
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-#endif /* CONFIG_PM */
-#ifndef USE_REBOOT_NOTIFIER
-static void igb_shutdown(struct pci_dev *);
-#else
-static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
-static struct notifier_block igb_notifier_reboot = {
-	.notifier_call	= igb_notify_reboot,
-	.next		= NULL,
-	.priority	= 0
-};
-#endif
-#ifdef IGB_DCA
-static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
-static struct notifier_block dca_notifier = {
-	.notifier_call	= igb_notify_dca,
-	.next		= NULL,
-	.priority	= 0
-};
-#endif
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* for netdump / net console */
-static void igb_netpoll(struct net_device *);
-#endif
-
-#ifdef HAVE_PCI_ERS
-static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
-		     pci_channel_state_t);
-static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
-static void igb_io_resume(struct pci_dev *);
-
-static struct pci_error_handlers igb_err_handler = {
-	.error_detected = igb_io_error_detected,
-	.slot_reset = igb_io_slot_reset,
-	.resume = igb_io_resume,
-};
-#endif
-
-static void igb_init_fw(struct igb_adapter *adapter);
-static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
-
-static struct pci_driver igb_driver = {
-	.name     = igb_driver_name,
-	.id_table = igb_pci_tbl,
-	.probe    = igb_probe,
-	.remove   = __devexit_p(igb_remove),
-#ifdef CONFIG_PM
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-	.driver.pm = &igb_pm_ops,
-#else
-	.suspend  = igb_suspend,
-	.resume   = igb_resume,
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-#endif /* CONFIG_PM */
-#ifndef USE_REBOOT_NOTIFIER
-	.shutdown = igb_shutdown,
-#endif
-#ifdef HAVE_PCI_ERS
-	.err_handler = &igb_err_handler
-#endif
-};
-
-//MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
-//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
-//MODULE_LICENSE("GPL");
-//MODULE_VERSION(DRV_VERSION);
-
-static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
-	u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
-	u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
-	u32 vfta;
-
-	/*
-	 * if this is the management vlan the only option is to add it in so
-	 * that the management pass through will continue to work
-	 */
-	if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
-	    (vid == mng_cookie->vlan_id))
-		add = TRUE;
-
-	vfta = adapter->shadow_vfta[index];
-
-	if (add)
-		vfta |= mask;
-	else
-		vfta &= ~mask;
-
-	e1000_write_vfta(hw, index, vfta);
-	adapter->shadow_vfta[index] = vfta;
-}
-
-static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
-//module_param(debug, int, 0);
-//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
-
-/**
- * igb_init_module - Driver Registration Routine
- *
- * igb_init_module is the first routine called when the driver is
- * loaded. All it does is register with the PCI subsystem.
- **/
-static int __init igb_init_module(void)
-{
-	int ret;
-
-	printk(KERN_INFO "%s - version %s\n",
-	       igb_driver_string, igb_driver_version);
-
-	printk(KERN_INFO "%s\n", igb_copyright);
-#ifdef IGB_HWMON
-/* only use IGB_PROCFS if IGB_HWMON is not defined */
-#else
-#ifdef IGB_PROCFS
-	if (igb_procfs_topdir_init())
-		printk(KERN_INFO "Procfs failed to initialize topdir\n");
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON  */
-
-#ifdef IGB_DCA
-	dca_register_notify(&dca_notifier);
-#endif
-	ret = pci_register_driver(&igb_driver);
-#ifdef USE_REBOOT_NOTIFIER
-	if (ret >= 0) {
-		register_reboot_notifier(&igb_notifier_reboot);
-	}
-#endif
-	return ret;
-}
-
-#undef module_init
-#define module_init(x) static int x(void)  __attribute__((__unused__));
-module_init(igb_init_module);
-
-/**
- * igb_exit_module - Driver Exit Cleanup Routine
- *
- * igb_exit_module is called just before the driver is removed
- * from memory.
- **/
-static void __exit igb_exit_module(void)
-{
-#ifdef IGB_DCA
-	dca_unregister_notify(&dca_notifier);
-#endif
-#ifdef USE_REBOOT_NOTIFIER
-	unregister_reboot_notifier(&igb_notifier_reboot);
-#endif
-	pci_unregister_driver(&igb_driver);
-
-#ifdef IGB_HWMON
-/* only compile IGB_PROCFS if IGB_HWMON is not defined */
-#else
-#ifdef IGB_PROCFS
-	igb_procfs_topdir_exit();
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-}
-
-#undef module_exit
-#define module_exit(x) static void x(void)  __attribute__((__unused__));
-module_exit(igb_exit_module);
-
-#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
-/**
- * igb_cache_ring_register - Descriptor ring to register mapping
- * @adapter: board private structure to initialize
- *
- * Once we know the feature-set enabled for the device, we'll cache
- * the register offset the descriptor ring is assigned to.
- **/
-static void igb_cache_ring_register(struct igb_adapter *adapter)
-{
-	int i = 0, j = 0;
-	u32 rbase_offset = adapter->vfs_allocated_count;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-		/* The queues are allocated for virtualization such that VF 0
-		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
-		 * In order to avoid collision we start at the first free queue
-		 * and continue consuming queues in the same sequence
-		 */
-		if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
-			for (; i < adapter->rss_queues; i++)
-				adapter->rx_ring[i]->reg_idx = rbase_offset +
-				                               Q_IDX_82576(i);
-		}
-	case e1000_82575:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-	default:
-		for (; i < adapter->num_rx_queues; i++)
-			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
-		for (; j < adapter->num_tx_queues; j++)
-			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
-		break;
-	}
-}
-
-static void igb_configure_lli(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u16 port;
-
-	/* LLI should only be enabled for MSI-X or MSI interrupts */
-	if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
-		return;
-
-	if (adapter->lli_port) {
-		/* use filter 0 for port */
-		port = htons((u16)adapter->lli_port);
-		E1000_WRITE_REG(hw, E1000_IMIR(0),
-			(port | E1000_IMIR_PORT_IM_EN));
-		E1000_WRITE_REG(hw, E1000_IMIREXT(0),
-			(E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
-	}
-
-	if (adapter->flags & IGB_FLAG_LLI_PUSH) {
-		/* use filter 1 for push flag */
-		E1000_WRITE_REG(hw, E1000_IMIR(1),
-			(E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
-		E1000_WRITE_REG(hw, E1000_IMIREXT(1),
-			(E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
-	}
-
-	if (adapter->lli_size) {
-		/* use filter 2 for size */
-		E1000_WRITE_REG(hw, E1000_IMIR(2),
-			(E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
-		E1000_WRITE_REG(hw, E1000_IMIREXT(2),
-			(adapter->lli_size | E1000_IMIREXT_CTRL_BP));
-	}
-
-}
-
-/**
- *  igb_write_ivar - configure ivar for given MSI-X vector
- *  @hw: pointer to the HW structure
- *  @msix_vector: vector number we are allocating to a given ring
- *  @index: row index of IVAR register to write within IVAR table
- *  @offset: column offset of in IVAR, should be multiple of 8
- *
- *  This function is intended to handle the writing of the IVAR register
- *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
- *  each containing an cause allocation for an Rx and Tx ring, and a
- *  variable number of rows depending on the number of queues supported.
- **/
-static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
-			   int index, int offset)
-{
-	u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
-
-	/* clear any bits that are currently set */
-	ivar &= ~((u32)0xFF << offset);
-
-	/* write vector and valid bit */
-	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
-
-	E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
-}
-
-#define IGB_N0_QUEUE -1
-static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	int rx_queue = IGB_N0_QUEUE;
-	int tx_queue = IGB_N0_QUEUE;
-	u32 msixbm = 0;
-
-	if (q_vector->rx.ring)
-		rx_queue = q_vector->rx.ring->reg_idx;
-	if (q_vector->tx.ring)
-		tx_queue = q_vector->tx.ring->reg_idx;
-
-	switch (hw->mac.type) {
-	case e1000_82575:
-		/* The 82575 assigns vectors using a bitmask, which matches the
-		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
-		   or more queues to a vector, we write the appropriate bits
-		   into the MSIXBM register for that vector. */
-		if (rx_queue > IGB_N0_QUEUE)
-			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
-		if (tx_queue > IGB_N0_QUEUE)
-			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
-		if (!adapter->msix_entries && msix_vector == 0)
-			msixbm |= E1000_EIMS_OTHER;
-		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
-		q_vector->eims_value = msixbm;
-		break;
-	case e1000_82576:
-		/*
-		 * 82576 uses a table that essentially consists of 2 columns
-		 * with 8 rows.  The ordering is column-major so we use the
-		 * lower 3 bits as the row index, and the 4th bit as the
-		 * column offset.
-		 */
-		if (rx_queue > IGB_N0_QUEUE)
-			igb_write_ivar(hw, msix_vector,
-				       rx_queue & 0x7,
-				       (rx_queue & 0x8) << 1);
-		if (tx_queue > IGB_N0_QUEUE)
-			igb_write_ivar(hw, msix_vector,
-				       tx_queue & 0x7,
-				       ((tx_queue & 0x8) << 1) + 8);
-		q_vector->eims_value = 1 << msix_vector;
-		break;
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		/*
-		 * On 82580 and newer adapters the scheme is similar to 82576
-		 * however instead of ordering column-major we have things
-		 * ordered row-major.  So we traverse the table by using
-		 * bit 0 as the column offset, and the remaining bits as the
-		 * row index.
-		 */
-		if (rx_queue > IGB_N0_QUEUE)
-			igb_write_ivar(hw, msix_vector,
-				       rx_queue >> 1,
-				       (rx_queue & 0x1) << 4);
-		if (tx_queue > IGB_N0_QUEUE)
-			igb_write_ivar(hw, msix_vector,
-				       tx_queue >> 1,
-				       ((tx_queue & 0x1) << 4) + 8);
-		q_vector->eims_value = 1 << msix_vector;
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	/* add q_vector eims value to global eims_enable_mask */
-	adapter->eims_enable_mask |= q_vector->eims_value;
-
-	/* configure q_vector to set itr on first interrupt */
-	q_vector->set_itr = 1;
-}
-
-/**
- * igb_configure_msix - Configure MSI-X hardware
- *
- * igb_configure_msix sets up the hardware to properly
- * generate MSI-X interrupts.
- **/
-static void igb_configure_msix(struct igb_adapter *adapter)
-{
-	u32 tmp;
-	int i, vector = 0;
-	struct e1000_hw *hw = &adapter->hw;
-
-	adapter->eims_enable_mask = 0;
-
-	/* set vector for other causes, i.e. link changes */
-	switch (hw->mac.type) {
-	case e1000_82575:
-		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
-		/* enable MSI-X PBA support*/
-		tmp |= E1000_CTRL_EXT_PBA_CLR;
-
-		/* Auto-Mask interrupts upon ICR read. */
-		tmp |= E1000_CTRL_EXT_EIAME;
-		tmp |= E1000_CTRL_EXT_IRCA;
-
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
-
-		/* enable msix_other interrupt */
-		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
-		                      E1000_EIMS_OTHER);
-		adapter->eims_other = E1000_EIMS_OTHER;
-
-		break;
-
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		/* Turn on MSI-X capability first, or our settings
-		 * won't stick.  And it will take days to debug. */
-		E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
-		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
-		                E1000_GPIE_NSICR);
-
-		/* enable msix_other interrupt */
-		adapter->eims_other = 1 << vector;
-		tmp = (vector++ | E1000_IVAR_VALID) << 8;
-
-		E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
-		break;
-	default:
-		/* do nothing, since nothing else supports MSI-X */
-		break;
-	} /* switch (hw->mac.type) */
-
-	adapter->eims_enable_mask |= adapter->eims_other;
-
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		igb_assign_vector(adapter->q_vector[i], vector++);
-
-	E1000_WRITE_FLUSH(hw);
-}
-
-/**
- * igb_request_msix - Initialize MSI-X interrupts
- *
- * igb_request_msix allocates MSI-X vectors and requests interrupts from the
- * kernel.
- **/
-static int igb_request_msix(struct igb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct e1000_hw *hw = &adapter->hw;
-	int i, err = 0, vector = 0, free_vector = 0;
-
-	err = request_irq(adapter->msix_entries[vector].vector,
-	                  &igb_msix_other, 0, netdev->name, adapter);
-	if (err)
-		goto err_out;
-
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		struct igb_q_vector *q_vector = adapter->q_vector[i];
-
-		vector++;
-
-		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
-
-		if (q_vector->rx.ring && q_vector->tx.ring)
-			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
-			        q_vector->rx.ring->queue_index);
-		else if (q_vector->tx.ring)
-			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
-			        q_vector->tx.ring->queue_index);
-		else if (q_vector->rx.ring)
-			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
-			        q_vector->rx.ring->queue_index);
-		else
-			sprintf(q_vector->name, "%s-unused", netdev->name);
-
-		err = request_irq(adapter->msix_entries[vector].vector,
-		                  igb_msix_ring, 0, q_vector->name,
-		                  q_vector);
-		if (err)
-			goto err_free;
-	}
-
-	igb_configure_msix(adapter);
-	return 0;
-
-err_free:
-	/* free already assigned IRQs */
-	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
-
-	vector--;
-	for (i = 0; i < vector; i++) {
-		free_irq(adapter->msix_entries[free_vector++].vector,
-			 adapter->q_vector[i]);
-	}
-err_out:
-	return err;
-}
-
-static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
-{
-	if (adapter->msix_entries) {
-		pci_disable_msix(adapter->pdev);
-		kfree(adapter->msix_entries);
-		adapter->msix_entries = NULL;
-	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
-		pci_disable_msi(adapter->pdev);
-	}
-}
-
-/**
- * igb_free_q_vector - Free memory allocated for specific interrupt vector
- * @adapter: board private structure to initialize
- * @v_idx: Index of vector to be freed
- *
- * This function frees the memory allocated to the q_vector.  In addition if
- * NAPI is enabled it will delete any references to the NAPI struct prior
- * to freeing the q_vector.
- **/
-static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
-{
-	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
-
-	if (q_vector->tx.ring)
-		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
-
-	if (q_vector->rx.ring)
-		adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL;
-
-	adapter->q_vector[v_idx] = NULL;
-	netif_napi_del(&q_vector->napi);
-#ifndef IGB_NO_LRO
-	__skb_queue_purge(&q_vector->lrolist.active);
-#endif
-	kfree(q_vector);
-}
-
-/**
- * igb_free_q_vectors - Free memory allocated for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * This function frees the memory allocated to the q_vectors.  In addition if
- * NAPI is enabled it will delete any references to the NAPI struct prior
- * to freeing the q_vector.
- **/
-static void igb_free_q_vectors(struct igb_adapter *adapter)
-{
-	int v_idx = adapter->num_q_vectors;
-
-	adapter->num_tx_queues = 0;
-	adapter->num_rx_queues = 0;
-	adapter->num_q_vectors = 0;
-
-	while (v_idx--)
-		igb_free_q_vector(adapter, v_idx);
-}
-
-/**
- * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
- *
- * This function resets the device so that it has 0 rx queues, tx queues, and
- * MSI-X interrupts allocated.
- */
-static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
-{
-	igb_free_q_vectors(adapter);
-	igb_reset_interrupt_capability(adapter);
-}
-
-/**
- * igb_process_mdd_event
- * @adapter - board private structure
- *
- * Identify a malicious VF, disable the VF TX/RX queues and log a message.
- */
-static void igb_process_mdd_event(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 lvmmc, vfte, vfre, mdfb;
-	u8 vf_queue;
-
-	lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
-	vf_queue = lvmmc >> 29;
-
-	/* VF index cannot be bigger or equal to VFs allocated */
-	if (vf_queue >= adapter->vfs_allocated_count)
-		return;
-
-	netdev_info(adapter->netdev,
-	            "VF %d misbehaved. VF queues are disabled. "
-	            "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
-
-	/* Disable VFTE and VFRE related bits */
-	vfte = E1000_READ_REG(hw, E1000_VFTE);
-	vfte &= ~(1 << vf_queue);
-	E1000_WRITE_REG(hw, E1000_VFTE, vfte);
-
-	vfre = E1000_READ_REG(hw, E1000_VFRE);
-	vfre &= ~(1 << vf_queue);
-	E1000_WRITE_REG(hw, E1000_VFRE, vfre);
-
-	/* Disable MDFB related bit. Clear on write */
-	mdfb = E1000_READ_REG(hw, E1000_MDFB);
-	mdfb |= (1 << vf_queue);
-	E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
-
-	/* Reset the specific VF */
-	E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
-}
-
-/**
- * igb_disable_mdd
- * @adapter - board private structure
- *
- * Disable MDD behavior in the HW
- **/
-static void igb_disable_mdd(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 reg;
-
-	if ((hw->mac.type != e1000_i350) ||
-	    (hw->mac.type != e1000_i354))
-		return;
-
-	reg = E1000_READ_REG(hw, E1000_DTXCTL);
-	reg &= (~E1000_DTXCTL_MDP_EN);
-	E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
-}
-
-/**
- * igb_enable_mdd
- * @adapter - board private structure
- *
- * Enable the HW to detect malicious driver and sends an interrupt to
- * the driver.
- **/
-static void igb_enable_mdd(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 reg;
-
-	/* Only available on i350 device */
-	if (hw->mac.type != e1000_i350)
-		return;
-
-	reg = E1000_READ_REG(hw, E1000_DTXCTL);
-	reg |= E1000_DTXCTL_MDP_EN;
-	E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
-}
-
-/**
- * igb_reset_sriov_capability - disable SR-IOV if enabled
- *
- * Attempt to disable single root IO virtualization capabilites present in the
- * kernel.
- **/
-static void igb_reset_sriov_capability(struct igb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_hw *hw = &adapter->hw;
-
-	/* reclaim resources allocated to VFs */
-	if (adapter->vf_data) {
-		if (!pci_vfs_assigned(pdev)) {
-			/*
-			 * disable iov and allow time for transactions to
-			 * clear
-			 */
-			pci_disable_sriov(pdev);
-			msleep(500);
-
-			dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
-		} else {
-			dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
-					"VF(s) are assigned to guests!\n");
-		}
-		/* Disable Malicious Driver Detection */
-		igb_disable_mdd(adapter);
-
-		/* free vf data storage */
-		kfree(adapter->vf_data);
-		adapter->vf_data = NULL;
-
-		/* switch rings back to PF ownership */
-		E1000_WRITE_REG(hw, E1000_IOVCTL,
-				E1000_IOVCTL_REUSE_VFQ);
-		E1000_WRITE_FLUSH(hw);
-		msleep(100);
-	}
-
-	adapter->vfs_allocated_count = 0;
-}
-
-/**
- * igb_set_sriov_capability - setup SR-IOV if supported
- *
- * Attempt to enable single root IO virtualization capabilites present in the
- * kernel.
- **/
-static void igb_set_sriov_capability(struct igb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int old_vfs = 0;
-	int i;
-
-	old_vfs = pci_num_vf(pdev);
-	if (old_vfs) {
-		dev_info(pci_dev_to_dev(pdev),
-				"%d pre-allocated VFs found - override "
-				"max_vfs setting of %d\n", old_vfs,
-				adapter->vfs_allocated_count);
-		adapter->vfs_allocated_count = old_vfs;
-	}
-	/* no VFs requested, do nothing */
-	if (!adapter->vfs_allocated_count)
-		return;
-
-	/* allocate vf data storage */
-	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
-	                           sizeof(struct vf_data_storage),
-	                           GFP_KERNEL);
-
-	if (adapter->vf_data) {
-		if (!old_vfs) {
-			if (pci_enable_sriov(pdev,
-					adapter->vfs_allocated_count))
-				goto err_out;
-		}
-		for (i = 0; i < adapter->vfs_allocated_count; i++)
-			igb_vf_configure(adapter, i);
-
-		switch (adapter->hw.mac.type) {
-		case e1000_82576:
-		case e1000_i350:
-			/* Enable VM to VM loopback by default */
-			adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
-			break;
-		default:
-			/* Currently no other hardware supports loopback */
-			break;
-		}
-
-		/* DMA Coalescing is not supported in IOV mode. */
-		if (adapter->hw.mac.type >= e1000_i350)
-		adapter->dmac = IGB_DMAC_DISABLE;
-		if (adapter->hw.mac.type < e1000_i350)
-		adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
-		return;
-
-	}
-
-err_out:
-	kfree(adapter->vf_data);
-	adapter->vf_data = NULL;
-	adapter->vfs_allocated_count = 0;
-	dev_warn(pci_dev_to_dev(pdev),
-			"Failed to initialize SR-IOV virtualization\n");
-}
-
-/**
- * igb_set_interrupt_capability - set MSI or MSI-X if supported
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- **/
-static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int err;
-	int numvecs, i;
-
-	if (!msix)
-		adapter->int_mode = IGB_INT_MODE_MSI;
-
-	/* Number of supported queues. */
-	adapter->num_rx_queues = adapter->rss_queues;
-
-	if (adapter->vmdq_pools > 1)
-		adapter->num_rx_queues += adapter->vmdq_pools - 1;
-
-#ifdef HAVE_TX_MQ
-	if (adapter->vmdq_pools)
-		adapter->num_tx_queues = adapter->vmdq_pools;
-	else
-		adapter->num_tx_queues = adapter->num_rx_queues;
-#else
-	adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
-#endif
-
-	switch (adapter->int_mode) {
-	case IGB_INT_MODE_MSIX:
-		/* start with one vector for every rx queue */
-		numvecs = adapter->num_rx_queues;
-
-		/* if tx handler is separate add 1 for every tx queue */
-		if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
-			numvecs += adapter->num_tx_queues;
-
-		/* store the number of vectors reserved for queues */
-		adapter->num_q_vectors = numvecs;
-
-		/* add 1 vector for link status interrupts */
-		numvecs++;
-		adapter->msix_entries = kcalloc(numvecs,
-		                                sizeof(struct msix_entry),
-		                                GFP_KERNEL);
-		if (adapter->msix_entries) {
-			for (i = 0; i < numvecs; i++)
-				adapter->msix_entries[i].entry = i;
-
-#ifdef HAVE_PCI_ENABLE_MSIX
-			err = pci_enable_msix(pdev,
-			                      adapter->msix_entries, numvecs);
-#else
-			err = pci_enable_msix_range(pdev,
-					adapter->msix_entries,
-					numvecs,
-					numvecs);
-#endif
-			if (err == 0)
-				break;
-		}
-		/* MSI-X failed, so fall through and try MSI */
-		dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
-		         "Falling back to MSI interrupts.\n");
-		igb_reset_interrupt_capability(adapter);
-	case IGB_INT_MODE_MSI:
-		if (!pci_enable_msi(pdev))
-			adapter->flags |= IGB_FLAG_HAS_MSI;
-		else
-			dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
-			         "interrupts.  Falling back to legacy "
-			         "interrupts.\n");
-		/* Fall through */
-	case IGB_INT_MODE_LEGACY:
-		/* disable advanced features and set number of queues to 1 */
-		igb_reset_sriov_capability(adapter);
-		adapter->vmdq_pools = 0;
-		adapter->rss_queues = 1;
-		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
-		adapter->num_rx_queues = 1;
-		adapter->num_tx_queues = 1;
-		adapter->num_q_vectors = 1;
-		/* Don't do anything; this is system default */
-		break;
-	}
-}
-
-static void igb_add_ring(struct igb_ring *ring,
-			 struct igb_ring_container *head)
-{
-	head->ring = ring;
-	head->count++;
-}
-
-/**
- * igb_alloc_q_vector - Allocate memory for a single interrupt vector
- * @adapter: board private structure to initialize
- * @v_count: q_vectors allocated on adapter, used for ring interleaving
- * @v_idx: index of vector in adapter struct
- * @txr_count: total number of Tx rings to allocate
- * @txr_idx: index of first Tx ring to allocate
- * @rxr_count: total number of Rx rings to allocate
- * @rxr_idx: index of first Rx ring to allocate
- *
- * We allocate one q_vector.  If allocation fails we return -ENOMEM.
- **/
-static int igb_alloc_q_vector(struct igb_adapter *adapter,
-			      unsigned int v_count, unsigned int v_idx,
-			      unsigned int txr_count, unsigned int txr_idx,
-			      unsigned int rxr_count, unsigned int rxr_idx)
-{
-	struct igb_q_vector *q_vector;
-	struct igb_ring *ring;
-	int ring_count, size;
-
-	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
-	if (txr_count > 1 || rxr_count > 1)
-		return -ENOMEM;
-
-	ring_count = txr_count + rxr_count;
-	size = sizeof(struct igb_q_vector) +
-	       (sizeof(struct igb_ring) * ring_count);
-
-	/* allocate q_vector and rings */
-	q_vector = kzalloc(size, GFP_KERNEL);
-	if (!q_vector)
-		return -ENOMEM;
-
-#ifndef IGB_NO_LRO
-	/* initialize LRO */
-	__skb_queue_head_init(&q_vector->lrolist.active);
-
-#endif
-	/* initialize NAPI */
-	netif_napi_add(adapter->netdev, &q_vector->napi,
-		       igb_poll, 64);
-
-	/* tie q_vector and adapter together */
-	adapter->q_vector[v_idx] = q_vector;
-	q_vector->adapter = adapter;
-
-	/* initialize work limits */
-	q_vector->tx.work_limit = adapter->tx_work_limit;
-
-	/* initialize ITR configuration */
-	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
-	q_vector->itr_val = IGB_START_ITR;
-
-	/* initialize pointer to rings */
-	ring = q_vector->ring;
-
-	/* initialize ITR */
-	if (rxr_count) {
-		/* rx or rx/tx vector */
-		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
-			q_vector->itr_val = adapter->rx_itr_setting;
-	} else {
-		/* tx only vector */
-		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
-			q_vector->itr_val = adapter->tx_itr_setting;
-	}
-
-	if (txr_count) {
-		/* assign generic ring traits */
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-
-		/* configure backlink on ring */
-		ring->q_vector = q_vector;
-
-		/* update q_vector Tx values */
-		igb_add_ring(ring, &q_vector->tx);
-
-		/* For 82575, context index must be unique per ring. */
-		if (adapter->hw.mac.type == e1000_82575)
-			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
-
-		/* apply Tx specific ring traits */
-		ring->count = adapter->tx_ring_count;
-		ring->queue_index = txr_idx;
-
-		/* assign ring to adapter */
-		adapter->tx_ring[txr_idx] = ring;
-
-		/* push pointer to next ring */
-		ring++;
-	}
-
-	if (rxr_count) {
-		/* assign generic ring traits */
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-
-		/* configure backlink on ring */
-		ring->q_vector = q_vector;
-
-		/* update q_vector Rx values */
-		igb_add_ring(ring, &q_vector->rx);
-
-#ifndef HAVE_NDO_SET_FEATURES
-		/* enable rx checksum */
-		set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
-
-#endif
-		/* set flag indicating ring supports SCTP checksum offload */
-		if (adapter->hw.mac.type >= e1000_82576)
-			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
-
-		if ((adapter->hw.mac.type == e1000_i350) ||
-		    (adapter->hw.mac.type == e1000_i354))
-			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
-
-		/* apply Rx specific ring traits */
-		ring->count = adapter->rx_ring_count;
-		ring->queue_index = rxr_idx;
-
-		/* assign ring to adapter */
-		adapter->rx_ring[rxr_idx] = ring;
-	}
-
-	return 0;
-}
-
-/**
- * igb_alloc_q_vectors - Allocate memory for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * We allocate one q_vector per queue interrupt.  If allocation fails we
- * return -ENOMEM.
- **/
-static int igb_alloc_q_vectors(struct igb_adapter *adapter)
-{
-	int q_vectors = adapter->num_q_vectors;
-	int rxr_remaining = adapter->num_rx_queues;
-	int txr_remaining = adapter->num_tx_queues;
-	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
-	int err;
-
-	if (q_vectors >= (rxr_remaining + txr_remaining)) {
-		for (; rxr_remaining; v_idx++) {
-			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
-						 0, 0, 1, rxr_idx);
-
-			if (err)
-				goto err_out;
-
-			/* update counts and index */
-			rxr_remaining--;
-			rxr_idx++;
-		}
-	}
-
-	for (; v_idx < q_vectors; v_idx++) {
-		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
-		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
-		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
-					 tqpv, txr_idx, rqpv, rxr_idx);
-
-		if (err)
-			goto err_out;
-
-		/* update counts and index */
-		rxr_remaining -= rqpv;
-		txr_remaining -= tqpv;
-		rxr_idx++;
-		txr_idx++;
-	}
-
-	return 0;
-
-err_out:
-	adapter->num_tx_queues = 0;
-	adapter->num_rx_queues = 0;
-	adapter->num_q_vectors = 0;
-
-	while (v_idx--)
-		igb_free_q_vector(adapter, v_idx);
-
-	return -ENOMEM;
-}
-
-/**
- * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
- *
- * This function initializes the interrupts and allocates all of the queues.
- **/
-static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int err;
-
-	igb_set_interrupt_capability(adapter, msix);
-
-	err = igb_alloc_q_vectors(adapter);
-	if (err) {
-		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
-		goto err_alloc_q_vectors;
-	}
-
-	igb_cache_ring_register(adapter);
-
-	return 0;
-
-err_alloc_q_vectors:
-	igb_reset_interrupt_capability(adapter);
-	return err;
-}
-
-/**
- * igb_request_irq - initialize interrupts
- *
- * Attempts to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- **/
-static int igb_request_irq(struct igb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-	int err = 0;
-
-	if (adapter->msix_entries) {
-		err = igb_request_msix(adapter);
-		if (!err)
-			goto request_done;
-		/* fall back to MSI */
-		igb_free_all_tx_resources(adapter);
-		igb_free_all_rx_resources(adapter);
-
-		igb_clear_interrupt_scheme(adapter);
-		igb_reset_sriov_capability(adapter);
-		err = igb_init_interrupt_scheme(adapter, false);
-		if (err)
-			goto request_done;
-		igb_setup_all_tx_resources(adapter);
-		igb_setup_all_rx_resources(adapter);
-		igb_configure(adapter);
-	}
-
-	igb_assign_vector(adapter->q_vector[0], 0);
-
-	if (adapter->flags & IGB_FLAG_HAS_MSI) {
-		err = request_irq(pdev->irq, &igb_intr_msi, 0,
-				  netdev->name, adapter);
-		if (!err)
-			goto request_done;
-
-		/* fall back to legacy interrupts */
-		igb_reset_interrupt_capability(adapter);
-		adapter->flags &= ~IGB_FLAG_HAS_MSI;
-	}
-
-	err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
-			  netdev->name, adapter);
-
-	if (err)
-		dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
-			err);
-
-request_done:
-	return err;
-}
-
-static void igb_free_irq(struct igb_adapter *adapter)
-{
-	if (adapter->msix_entries) {
-		int vector = 0, i;
-
-		free_irq(adapter->msix_entries[vector++].vector, adapter);
-
-		for (i = 0; i < adapter->num_q_vectors; i++)
-			free_irq(adapter->msix_entries[vector++].vector,
-			         adapter->q_vector[i]);
-	} else {
-		free_irq(adapter->pdev->irq, adapter);
-	}
-}
-
-/**
- * igb_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- **/
-static void igb_irq_disable(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	/*
-	 * we need to be careful when disabling interrupts.  The VFs are also
-	 * mapped into these registers and so clearing the bits can cause
-	 * issues on the VF drivers so we only need to clear what we set
-	 */
-	if (adapter->msix_entries) {
-		u32 regval = E1000_READ_REG(hw, E1000_EIAM);
-		E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
-		E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
-		regval = E1000_READ_REG(hw, E1000_EIAC);
-		E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
-	}
-
-	E1000_WRITE_REG(hw, E1000_IAM, 0);
-	E1000_WRITE_REG(hw, E1000_IMC, ~0);
-	E1000_WRITE_FLUSH(hw);
-
-	if (adapter->msix_entries) {
-		int vector = 0, i;
-
-		synchronize_irq(adapter->msix_entries[vector++].vector);
-
-		for (i = 0; i < adapter->num_q_vectors; i++)
-			synchronize_irq(adapter->msix_entries[vector++].vector);
-	} else {
-		synchronize_irq(adapter->pdev->irq);
-	}
-}
-
-/**
- * igb_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- **/
-static void igb_irq_enable(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (adapter->msix_entries) {
-		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
-		u32 regval = E1000_READ_REG(hw, E1000_EIAC);
-		E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
-		regval = E1000_READ_REG(hw, E1000_EIAM);
-		E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
-		E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
-		if (adapter->vfs_allocated_count) {
-			E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
-			ims |= E1000_IMS_VMMB;
-			if (adapter->mdd)
-				if ((adapter->hw.mac.type == e1000_i350) ||
-				    (adapter->hw.mac.type == e1000_i354))
-				ims |= E1000_IMS_MDDET;
-		}
-		E1000_WRITE_REG(hw, E1000_IMS, ims);
-	} else {
-		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
-				E1000_IMS_DRSTA);
-		E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
-				E1000_IMS_DRSTA);
-	}
-}
-
-static void igb_update_mng_vlan(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u16 vid = adapter->hw.mng_cookie.vlan_id;
-	u16 old_vid = adapter->mng_vlan_id;
-
-	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
-		/* add VID to filter table */
-		igb_vfta_set(adapter, vid, TRUE);
-		adapter->mng_vlan_id = vid;
-	} else {
-		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
-	}
-
-	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
-	    (vid != old_vid) &&
-#ifdef HAVE_VLAN_RX_REGISTER
-	    !vlan_group_get_device(adapter->vlgrp, old_vid)) {
-#else
-	    !test_bit(old_vid, adapter->active_vlans)) {
-#endif
-		/* remove VID from filter table */
-		igb_vfta_set(adapter, old_vid, FALSE);
-	}
-}
-
-/**
- * igb_release_hw_control - release control of the h/w to f/w
- * @adapter: address of board private structure
- *
- * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
- * For ASF and Pass Through versions of f/w this means that the
- * driver is no longer loaded.
- *
- **/
-static void igb_release_hw_control(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl_ext;
-
-	/* Let firmware take over control of h/w */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
-			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
-}
-
-/**
- * igb_get_hw_control - get control of the h/w from f/w
- * @adapter: address of board private structure
- *
- * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
- * For ASF and Pass Through versions of f/w this means that
- * the driver is loaded.
- *
- **/
-static void igb_get_hw_control(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl_ext;
-
-	/* Let firmware know the driver has taken over */
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
-			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
-}
-
-/**
- * igb_configure - configure the hardware for RX and TX
- * @adapter: private board structure
- **/
-static void igb_configure(struct igb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int i;
-
-	igb_get_hw_control(adapter);
-	igb_set_rx_mode(netdev);
-
-	igb_restore_vlan(adapter);
-
-	igb_setup_tctl(adapter);
-	igb_setup_mrqc(adapter);
-	igb_setup_rctl(adapter);
-
-	igb_configure_tx(adapter);
-	igb_configure_rx(adapter);
-
-	e1000_rx_fifo_flush_82575(&adapter->hw);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
-	if (adapter->num_tx_queues > 1)
-		netdev->features |= NETIF_F_MULTI_QUEUE;
-	else
-		netdev->features &= ~NETIF_F_MULTI_QUEUE;
-#endif
-
-	/* call igb_desc_unused which always leaves
-	 * at least 1 descriptor unused to make sure
-	 * next_to_use != next_to_clean */
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		struct igb_ring *ring = adapter->rx_ring[i];
-		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
-	}
-}
-
-/**
- * igb_power_up_link - Power up the phy/serdes link
- * @adapter: address of board private structure
- **/
-void igb_power_up_link(struct igb_adapter *adapter)
-{
-	e1000_phy_hw_reset(&adapter->hw);
-
-	if (adapter->hw.phy.media_type == e1000_media_type_copper)
-		e1000_power_up_phy(&adapter->hw);
-	else
-		e1000_power_up_fiber_serdes_link(&adapter->hw);
-}
-
-/**
- * igb_power_down_link - Power down the phy/serdes link
- * @adapter: address of board private structure
- */
-static void igb_power_down_link(struct igb_adapter *adapter)
-{
-	if (adapter->hw.phy.media_type == e1000_media_type_copper)
-		e1000_power_down_phy(&adapter->hw);
-	else
-		e1000_shutdown_fiber_serdes_link(&adapter->hw);
-}
-
-/* Detect and switch function for Media Auto Sense */
-static void igb_check_swap_media(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl_ext, connsw;
-	bool swap_now = false;
-	bool link;
-
-	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	connsw = E1000_READ_REG(hw, E1000_CONNSW);
-	link = igb_has_link(adapter);
-	(void) link;
-
-	/* need to live swap if current media is copper and we have fiber/serdes
-	 * to go to.
-	 */
-
-	if ((hw->phy.media_type == e1000_media_type_copper) &&
-	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
-		swap_now = true;
-	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
-		/* copper signal takes time to appear */
-		if (adapter->copper_tries < 2) {
-			adapter->copper_tries++;
-			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
-			E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
-			return;
-		} else {
-			adapter->copper_tries = 0;
-			if ((connsw & E1000_CONNSW_PHYSD) &&
-			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
-				swap_now = true;
-				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
-				E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
-			}
-		}
-	}
-
-	if (swap_now) {
-		switch (hw->phy.media_type) {
-		case e1000_media_type_copper:
-			dev_info(pci_dev_to_dev(adapter->pdev),
-				 "%s:MAS: changing media to fiber/serdes\n",
-			adapter->netdev->name);
-			ctrl_ext |=
-				E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
-			adapter->flags |= IGB_FLAG_MEDIA_RESET;
-			adapter->copper_tries = 0;
-			break;
-		case e1000_media_type_internal_serdes:
-		case e1000_media_type_fiber:
-			dev_info(pci_dev_to_dev(adapter->pdev),
-				 "%s:MAS: changing media to copper\n",
-				 adapter->netdev->name);
-			ctrl_ext &=
-				~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
-			adapter->flags |= IGB_FLAG_MEDIA_RESET;
-			break;
-		default:
-			/* shouldn't get here during regular operation */
-			dev_err(pci_dev_to_dev(adapter->pdev),
-				"%s:AMS: Invalid media type found, returning\n",
-				adapter->netdev->name);
-			break;
-		}
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
-	}
-}
-
-#ifdef HAVE_I2C_SUPPORT
-/*  igb_get_i2c_data - Reads the I2C SDA data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Returns the I2C data bit value
- */
-static int igb_get_i2c_data(void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	return (i2cctl & E1000_I2C_DATA_IN) != 0;
-}
-
-/* igb_set_i2c_data - Sets the I2C data bit
- *  @data: pointer to hardware structure
- *  @state: I2C data value (0 or 1) to set
- *
- *  Sets the I2C data bit
- */
-static void igb_set_i2c_data(void *data, int state)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	if (state)
-		i2cctl |= E1000_I2C_DATA_OUT;
-	else
-		i2cctl &= ~E1000_I2C_DATA_OUT;
-
-	i2cctl &= ~E1000_I2C_DATA_OE_N;
-	i2cctl |= E1000_I2C_CLK_OE_N;
-
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
-	E1000_WRITE_FLUSH(hw);
-
-}
-
-/* igb_set_i2c_clk - Sets the I2C SCL clock
- *  @data: pointer to hardware structure
- *  @state: state to set clock
- *
- *  Sets the I2C clock line to state
- */
-static void igb_set_i2c_clk(void *data, int state)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	if (state) {
-		i2cctl |= E1000_I2C_CLK_OUT;
-		i2cctl &= ~E1000_I2C_CLK_OE_N;
-	} else {
-		i2cctl &= ~E1000_I2C_CLK_OUT;
-		i2cctl &= ~E1000_I2C_CLK_OE_N;
-	}
-	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
-	E1000_WRITE_FLUSH(hw);
-}
-
-/* igb_get_i2c_clk - Gets the I2C SCL clock state
- *  @data: pointer to hardware structure
- *
- *  Gets the I2C clock state
- */
-static int igb_get_i2c_clk(void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
-
-	return (i2cctl & E1000_I2C_CLK_IN) != 0;
-}
-
-static const struct i2c_algo_bit_data igb_i2c_algo = {
-	.setsda		= igb_set_i2c_data,
-	.setscl		= igb_set_i2c_clk,
-	.getsda		= igb_get_i2c_data,
-	.getscl		= igb_get_i2c_clk,
-	.udelay		= 5,
-	.timeout	= 20,
-};
-
-/*  igb_init_i2c - Init I2C interface
- *  @adapter: pointer to adapter structure
- *
- */
-static s32 igb_init_i2c(struct igb_adapter *adapter)
-{
-	s32 status = E1000_SUCCESS;
-
-	/* I2C interface supported on i350 devices */
-	if (adapter->hw.mac.type != e1000_i350)
-		return E1000_SUCCESS;
-
-	/* Initialize the i2c bus which is controlled by the registers.
-	 * This bus will use the i2c_algo_bit structue that implements
-	 * the protocol through toggling of the 4 bits in the register.
-	 */
-	adapter->i2c_adap.owner = THIS_MODULE;
-	adapter->i2c_algo = igb_i2c_algo;
-	adapter->i2c_algo.data = adapter;
-	adapter->i2c_adap.algo_data = &adapter->i2c_algo;
-	adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
-	strlcpy(adapter->i2c_adap.name, "igb BB",
-		sizeof(adapter->i2c_adap.name));
-	status = i2c_bit_add_bus(&adapter->i2c_adap);
-	return status;
-}
-
-#endif /* HAVE_I2C_SUPPORT */
-/**
- * igb_up - Open the interface and prepare it to handle traffic
- * @adapter: board private structure
- **/
-int igb_up(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-
-	/* hardware has been reset, we need to reload some things */
-	igb_configure(adapter);
-
-	clear_bit(__IGB_DOWN, &adapter->state);
-
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		napi_enable(&(adapter->q_vector[i]->napi));
-
-	if (adapter->msix_entries)
-		igb_configure_msix(adapter);
-	else
-		igb_assign_vector(adapter->q_vector[0], 0);
-
-	igb_configure_lli(adapter);
-
-	/* Clear any pending interrupts. */
-	E1000_READ_REG(hw, E1000_ICR);
-	igb_irq_enable(adapter);
-
-	/* notify VFs that reset has been completed */
-	if (adapter->vfs_allocated_count) {
-		u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
-		reg_data |= E1000_CTRL_EXT_PFRSTD;
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
-	}
-
-	netif_tx_start_all_queues(adapter->netdev);
-
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		schedule_work(&adapter->dma_err_task);
-	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
-	schedule_work(&adapter->watchdog_task);
-
-	if ((adapter->flags & IGB_FLAG_EEE) &&
-	    (!hw->dev_spec._82575.eee_disable))
-		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
-
-	return 0;
-}
-
-void igb_down(struct igb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct e1000_hw *hw = &adapter->hw;
-	u32 tctl, rctl;
-	int i;
-
-	/* signal that we're down so the interrupt handler does not
-	 * reschedule our watchdog timer */
-	set_bit(__IGB_DOWN, &adapter->state);
-
-	/* disable receives in the hardware */
-	rctl = E1000_READ_REG(hw, E1000_RCTL);
-	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
-	/* flush and sleep below */
-
-	netif_tx_stop_all_queues(netdev);
-
-	/* disable transmits in the hardware */
-	tctl = E1000_READ_REG(hw, E1000_TCTL);
-	tctl &= ~E1000_TCTL_EN;
-	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
-	/* flush both disables and wait for them to finish */
-	E1000_WRITE_FLUSH(hw);
-	usleep_range(10000, 20000);
-
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		napi_disable(&(adapter->q_vector[i]->napi));
-
-	igb_irq_disable(adapter);
-
-	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
-
-	del_timer_sync(&adapter->watchdog_timer);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		del_timer_sync(&adapter->dma_err_timer);
-	del_timer_sync(&adapter->phy_info_timer);
-
-	netif_carrier_off(netdev);
-
-	/* record the stats before reset*/
-	igb_update_stats(adapter);
-
-	adapter->link_speed = 0;
-	adapter->link_duplex = 0;
-
-#ifdef HAVE_PCI_ERS
-	if (!pci_channel_offline(adapter->pdev))
-		igb_reset(adapter);
-#else
-	igb_reset(adapter);
-#endif
-	igb_clean_all_tx_rings(adapter);
-	igb_clean_all_rx_rings(adapter);
-#ifdef IGB_DCA
-	/* since we reset the hardware DCA settings were cleared */
-	igb_setup_dca(adapter);
-#endif
-}
-
-void igb_reinit_locked(struct igb_adapter *adapter)
-{
-	WARN_ON(in_interrupt());
-	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-	igb_down(adapter);
-	igb_up(adapter);
-	clear_bit(__IGB_RESETTING, &adapter->state);
-}
-
-/**
- * igb_enable_mas - Media Autosense re-enable after swap
- *
- * @adapter: adapter struct
- **/
-static s32  igb_enable_mas(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 connsw;
-	s32 ret_val = E1000_SUCCESS;
-
-	connsw = E1000_READ_REG(hw, E1000_CONNSW);
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		/* configure for SerDes media detect */
-		if (!(connsw & E1000_CONNSW_SERDESD)) {
-			connsw |= E1000_CONNSW_ENRGSRC;
-			connsw |= E1000_CONNSW_AUTOSENSE_EN;
-			E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
-			E1000_WRITE_FLUSH(hw);
-		} else if (connsw & E1000_CONNSW_SERDESD) {
-			/* already SerDes, no need to enable anything */
-			return ret_val;
-		} else {
-			dev_info(pci_dev_to_dev(adapter->pdev),
-			"%s:MAS: Unable to configure feature, disabling..\n",
-			adapter->netdev->name);
-			adapter->flags &= ~IGB_FLAG_MAS_ENABLE;
-		}
-	}
-	return ret_val;
-}
-
-void igb_reset(struct igb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_hw *hw = &adapter->hw;
-	struct e1000_mac_info *mac = &hw->mac;
-	struct e1000_fc_info *fc = &hw->fc;
-	u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
-
-	/* Repartition Pba for greater than 9k mtu
-	 * To take effect CTRL.RST is required.
-	 */
-	switch (mac->type) {
-	case e1000_i350:
-	case e1000_82580:
-	case e1000_i354:
-		pba = E1000_READ_REG(hw, E1000_RXPBS);
-		pba = e1000_rxpbs_adjust_82580(pba);
-		break;
-	case e1000_82576:
-		pba = E1000_READ_REG(hw, E1000_RXPBS);
-		pba &= E1000_RXPBS_SIZE_MASK_82576;
-		break;
-	case e1000_82575:
-	case e1000_i210:
-	case e1000_i211:
-	default:
-		pba = E1000_PBA_34K;
-		break;
-	}
-
-	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
-	    (mac->type < e1000_82576)) {
-		/* adjust PBA for jumbo frames */
-		E1000_WRITE_REG(hw, E1000_PBA, pba);
-
-		/* To maintain wire speed transmits, the Tx FIFO should be
-		 * large enough to accommodate two full transmit packets,
-		 * rounded up to the next 1KB and expressed in KB.  Likewise,
-		 * the Rx FIFO should be large enough to accommodate at least
-		 * one full receive packet and is similarly rounded up and
-		 * expressed in KB. */
-		pba = E1000_READ_REG(hw, E1000_PBA);
-		/* upper 16 bits has Tx packet buffer allocation size in KB */
-		tx_space = pba >> 16;
-		/* lower 16 bits has Rx packet buffer allocation size in KB */
-		pba &= 0xffff;
-		/* the tx fifo also stores 16 bytes of information about the tx
-		 * but don't include ethernet FCS because hardware appends it */
-		min_tx_space = (adapter->max_frame_size +
-				sizeof(union e1000_adv_tx_desc) -
-				ETH_FCS_LEN) * 2;
-		min_tx_space = ALIGN(min_tx_space, 1024);
-		min_tx_space >>= 10;
-		/* software strips receive CRC, so leave room for it */
-		min_rx_space = adapter->max_frame_size;
-		min_rx_space = ALIGN(min_rx_space, 1024);
-		min_rx_space >>= 10;
-
-		/* If current Tx allocation is less than the min Tx FIFO size,
-		 * and the min Tx FIFO size is less than the current Rx FIFO
-		 * allocation, take space away from current Rx allocation */
-		if (tx_space < min_tx_space &&
-		    ((min_tx_space - tx_space) < pba)) {
-			pba = pba - (min_tx_space - tx_space);
-
-			/* if short on rx space, rx wins and must trump tx
-			 * adjustment */
-			if (pba < min_rx_space)
-				pba = min_rx_space;
-		}
-		E1000_WRITE_REG(hw, E1000_PBA, pba);
-	}
-
-	/* flow control settings */
-	/* The high water mark must be low enough to fit one full frame
-	 * (or the size used for early receive) above it in the Rx FIFO.
-	 * Set it to the lower of:
-	 * - 90% of the Rx FIFO size, or
-	 * - the full Rx FIFO size minus one full frame */
-	hwm = min(((pba << 10) * 9 / 10),
-			((pba << 10) - 2 * adapter->max_frame_size));
-
-	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
-	fc->low_water = fc->high_water - 16;
-	fc->pause_time = 0xFFFF;
-	fc->send_xon = 1;
-	fc->current_mode = fc->requested_mode;
-
-	/* disable receive for all VFs and wait one second */
-	if (adapter->vfs_allocated_count) {
-		int i;
-		/*
-		 * Clear all flags except indication that the PF has set
-		 * the VF MAC addresses administratively
-		 */
-		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
-			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
-
-		/* ping all the active vfs to let them know we are going down */
-		igb_ping_all_vfs(adapter);
-
-		/* disable transmits and receives */
-		E1000_WRITE_REG(hw, E1000_VFRE, 0);
-		E1000_WRITE_REG(hw, E1000_VFTE, 0);
-	}
-
-	/* Allow time for pending master requests to run */
-	e1000_reset_hw(hw);
-	E1000_WRITE_REG(hw, E1000_WUC, 0);
-
-	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
-		e1000_setup_init_funcs(hw, TRUE);
-		igb_check_options(adapter);
-		e1000_get_bus_info(hw);
-		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
-	}
-	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
-		if (igb_enable_mas(adapter))
-			dev_err(pci_dev_to_dev(pdev),
-				"Error enabling Media Auto Sense\n");
-	}
-	if (e1000_init_hw(hw))
-		dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
-
-	/*
-	 * Flow control settings reset on hardware reset, so guarantee flow
-	 * control is off when forcing speed.
-	 */
-	if (!hw->mac.autoneg)
-		e1000_force_mac_fc(hw);
-
-	igb_init_dmac(adapter, pba);
-	/* Re-initialize the thermal sensor on i350 devices. */
-	if (mac->type == e1000_i350 && hw->bus.func == 0) {
-		/*
-		 * If present, re-initialize the external thermal sensor
-		 * interface.
-		 */
-		if (adapter->ets)
-			e1000_set_i2c_bb(hw);
-		e1000_init_thermal_sensor_thresh(hw);
-	}
-
-	/*Re-establish EEE setting */
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		switch (mac->type) {
-		case e1000_i350:
-		case e1000_i210:
-		case e1000_i211:
-			e1000_set_eee_i350(hw);
-			break;
-		case e1000_i354:
-			e1000_set_eee_i354(hw);
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (!netif_running(adapter->netdev))
-		igb_power_down_link(adapter);
-
-	igb_update_mng_vlan(adapter);
-
-	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
-	E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
-
-
-#ifdef HAVE_PTP_1588_CLOCK
-	/* Re-enable PTP, where applicable. */
-	igb_ptp_reset(adapter);
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	e1000_get_phy_info(hw);
-
-	adapter->devrc++;
-}
-
-#ifdef HAVE_NDO_SET_FEATURES
-static kni_netdev_features_t igb_fix_features(struct net_device *netdev,
-					      kni_netdev_features_t features)
-{
-	/*
-	 * Since there is no support for separate tx vlan accel
-	 * enabled make sure tx flag is cleared if rx is.
-	 */
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-	if (!(features & NETIF_F_HW_VLAN_CTAG_RX))
-		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
-#else
-	if (!(features & NETIF_F_HW_VLAN_RX))
-		features &= ~NETIF_F_HW_VLAN_TX;
-#endif
-
-	/* If Rx checksum is disabled, then LRO should also be disabled */
-	if (!(features & NETIF_F_RXCSUM))
-		features &= ~NETIF_F_LRO;
-
-	return features;
-}
-
-static int igb_set_features(struct net_device *netdev,
-			    kni_netdev_features_t features)
-{
-	u32 changed = netdev->features ^ features;
-
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
-#else
-	if (changed & NETIF_F_HW_VLAN_RX)
-#endif
-		igb_vlan_mode(netdev, features);
-
-	return 0;
-}
-
-#ifdef NTF_SELF
-#ifdef USE_CONST_DEV_UC_CHAR
-static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev,
-			   const unsigned char *addr,
-#ifdef HAVE_NDO_FDB_ADD_VID
-			   u16 vid,
-#endif
-			   u16 flags)
-#else
-static int igb_ndo_fdb_add(struct ndmsg *ndm,
-			   struct net_device *dev,
-			   unsigned char *addr,
-			   u16 flags)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	struct e1000_hw *hw = &adapter->hw;
-	int err;
-
-	if (!(adapter->vfs_allocated_count))
-		return -EOPNOTSUPP;
-
-	/* Hardware does not support aging addresses so if a
-	 * ndm_state is given only allow permanent addresses
-	 */
-	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
-		pr_info("%s: FDB only supports static addresses\n",
-			igb_driver_name);
-		return -EINVAL;
-	}
-
-	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
-		u32 rar_uc_entries = hw->mac.rar_entry_count -
-					(adapter->vfs_allocated_count + 1);
-
-		if (netdev_uc_count(dev) < rar_uc_entries)
-			err = dev_uc_add_excl(dev, addr);
-		else
-			err = -ENOMEM;
-	} else if (is_multicast_ether_addr(addr)) {
-		err = dev_mc_add_excl(dev, addr);
-	} else {
-		err = -EINVAL;
-	}
-
-	/* Only return duplicate errors if NLM_F_EXCL is set */
-	if (err == -EEXIST && !(flags & NLM_F_EXCL))
-		err = 0;
-
-	return err;
-}
-
-#ifndef USE_DEFAULT_FDB_DEL_DUMP
-#ifdef USE_CONST_DEV_UC_CHAR
-static int igb_ndo_fdb_del(struct ndmsg *ndm,
-			   struct net_device *dev,
-			   const unsigned char *addr)
-#else
-static int igb_ndo_fdb_del(struct ndmsg *ndm,
-			   struct net_device *dev,
-			   unsigned char *addr)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	int err = -EOPNOTSUPP;
-
-	if (ndm->ndm_state & NUD_PERMANENT) {
-		pr_info("%s: FDB only supports static addresses\n",
-			igb_driver_name);
-		return -EINVAL;
-	}
-
-	if (adapter->vfs_allocated_count) {
-		if (is_unicast_ether_addr(addr))
-			err = dev_uc_del(dev, addr);
-		else if (is_multicast_ether_addr(addr))
-			err = dev_mc_del(dev, addr);
-		else
-			err = -EINVAL;
-	}
-
-	return err;
-}
-
-static int igb_ndo_fdb_dump(struct sk_buff *skb,
-			    struct netlink_callback *cb,
-			    struct net_device *dev,
-			    int idx)
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-
-	if (adapter->vfs_allocated_count)
-		idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
-
-	return idx;
-}
-#endif /* USE_DEFAULT_FDB_DEL_DUMP */
-
-#ifdef HAVE_BRIDGE_ATTRIBS
-#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
-static int igb_ndo_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh,
-				  u16 flags)
-#else
-static int igb_ndo_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh)
-#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct nlattr *attr, *br_spec;
-	int rem;
-
-	if (!(adapter->vfs_allocated_count))
-		return -EOPNOTSUPP;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-	case e1000_i350:
-	case e1000_i354:
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
-
-	nla_for_each_nested(attr, br_spec, rem) {
-		__u16 mode;
-
-		if (nla_type(attr) != IFLA_BRIDGE_MODE)
-			continue;
-
-		mode = nla_get_u16(attr);
-		if (mode == BRIDGE_MODE_VEPA) {
-			e1000_vmdq_set_loopback_pf(hw, 0);
-			adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE;
-		} else if (mode == BRIDGE_MODE_VEB) {
-			e1000_vmdq_set_loopback_pf(hw, 1);
-			adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
-		} else
-			return -EINVAL;
-
-		netdev_info(adapter->netdev, "enabling bridge mode: %s\n",
-			    mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
-	}
-
-	return 0;
-}
-
-#ifdef HAVE_BRIDGE_FILTER
-#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
-static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags)
-#else
-static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask)
-#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
-#else
-static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-	u16 mode;
-
-	if (!(adapter->vfs_allocated_count))
-		return -EOPNOTSUPP;
-
-	if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE)
-		mode = BRIDGE_MODE_VEB;
-	else
-		mode = BRIDGE_MODE_VEPA;
-
-#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK
-#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
-#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0,
-				nlflags, filter_mask, NULL);
-#else
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags);
-#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */
-#else
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0);
-#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
-#else
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
-#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */
-}
-#endif /* HAVE_BRIDGE_ATTRIBS */
-#endif /* NTF_SELF */
-
-#endif /* HAVE_NDO_SET_FEATURES */
-#ifdef HAVE_NET_DEVICE_OPS
-static const struct net_device_ops igb_netdev_ops = {
-	.ndo_open		= igb_open,
-	.ndo_stop		= igb_close,
-	.ndo_start_xmit		= igb_xmit_frame,
-	.ndo_get_stats		= igb_get_stats,
-	.ndo_set_rx_mode	= igb_set_rx_mode,
-	.ndo_set_mac_address	= igb_set_mac,
-	.ndo_change_mtu		= igb_change_mtu,
-	.ndo_do_ioctl		= igb_ioctl,
-	.ndo_tx_timeout		= igb_tx_timeout,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
-#ifdef IFLA_VF_MAX
-	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
-	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
-#ifdef HAVE_VF_MIN_MAX_TXRATE
-	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
-#else /* HAVE_VF_MIN_MAX_TXRATE */
-	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
-#endif /* HAVE_VF_MIN_MAX_TXRATE */
-	.ndo_get_vf_config	= igb_ndo_get_vf_config,
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
-#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
-#endif /* IFLA_VF_MAX */
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= igb_netpoll,
-#endif
-#ifdef HAVE_NDO_SET_FEATURES
-	.ndo_fix_features	= igb_fix_features,
-	.ndo_set_features	= igb_set_features,
-#endif
-#ifdef HAVE_VLAN_RX_REGISTER
-	.ndo_vlan_rx_register	= igb_vlan_mode,
-#endif
-#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB
-#ifdef NTF_SELF
-	.ndo_fdb_add		= igb_ndo_fdb_add,
-#ifndef USE_DEFAULT_FDB_DEL_DUMP
-	.ndo_fdb_del		= igb_ndo_fdb_del,
-	.ndo_fdb_dump		= igb_ndo_fdb_dump,
-#endif
-#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */
-#ifdef HAVE_BRIDGE_ATTRIBS
-	.ndo_bridge_setlink	= igb_ndo_bridge_setlink,
-	.ndo_bridge_getlink	= igb_ndo_bridge_getlink,
-#endif /* HAVE_BRIDGE_ATTRIBS */
-#endif
-};
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-static const struct net_device_ops igb_vmdq_ops = {
-	.ndo_open		= &igb_vmdq_open,
-	.ndo_stop		= &igb_vmdq_close,
-	.ndo_start_xmit		= &igb_vmdq_xmit_frame,
-	.ndo_get_stats		= &igb_vmdq_get_stats,
-	.ndo_set_rx_mode	= &igb_vmdq_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= &igb_vmdq_set_mac,
-	.ndo_change_mtu		= &igb_vmdq_change_mtu,
-	.ndo_tx_timeout		= &igb_vmdq_tx_timeout,
-	.ndo_vlan_rx_register	= &igb_vmdq_vlan_rx_register,
-	.ndo_vlan_rx_add_vid	= &igb_vmdq_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= &igb_vmdq_vlan_rx_kill_vid,
-};
-
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-#endif /* HAVE_NET_DEVICE_OPS */
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
-{
-#ifdef HAVE_NET_DEVICE_OPS
-	vnetdev->netdev_ops = &igb_vmdq_ops;
-#else
-	dev->open = &igb_vmdq_open;
-	dev->stop = &igb_vmdq_close;
-	dev->hard_start_xmit = &igb_vmdq_xmit_frame;
-	dev->get_stats = &igb_vmdq_get_stats;
-#ifdef HAVE_SET_RX_MODE
-	dev->set_rx_mode = &igb_vmdq_set_rx_mode;
-#endif
-	dev->set_multicast_list = &igb_vmdq_set_rx_mode;
-	dev->set_mac_address = &igb_vmdq_set_mac;
-	dev->change_mtu = &igb_vmdq_change_mtu;
-#ifdef HAVE_TX_TIMEOUT
-	dev->tx_timeout = &igb_vmdq_tx_timeout;
-#endif
-#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX)
-	dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
-	dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
-	dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
-#endif
-#endif
-	igb_vmdq_set_ethtool_ops(vnetdev);
-	vnetdev->watchdog_timeo = 5 * HZ;
-
-}
-
-int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
-{
-	int pool, err = 0, base_queue;
-	struct net_device *vnetdev;
-	struct igb_vmdq_adapter *vmdq_adapter;
-
-	for (pool = 1; pool < adapter->vmdq_pools; pool++) {
-		int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
-		base_queue = pool * qpp;
-		vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
-		if (!vnetdev) {
-			err = -ENOMEM;
-			break;
-		}
-		vmdq_adapter = netdev_priv(vnetdev);
-		vmdq_adapter->vnetdev = vnetdev;
-		vmdq_adapter->real_adapter = adapter;
-		vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
-		vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
-		igb_assign_vmdq_netdev_ops(vnetdev);
-		snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
-			 adapter->netdev->name, pool);
-		vnetdev->features = adapter->netdev->features;
-#ifdef HAVE_NETDEV_VLAN_FEATURES
-		vnetdev->vlan_features = adapter->netdev->vlan_features;
-#endif
-		adapter->vmdq_netdev[pool-1] = vnetdev;
-		err = register_netdev(vnetdev);
-		if (err)
-			break;
-	}
-	return err;
-}
-
-int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
-{
-	int pool, err = 0;
-
-	for (pool = 1; pool < adapter->vmdq_pools; pool++) {
-		unregister_netdev(adapter->vmdq_netdev[pool-1]);
-		free_netdev(adapter->vmdq_netdev[pool-1]);
-		adapter->vmdq_netdev[pool-1] = NULL;
-	}
-	return err;
-}
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-
-/**
- * igb_set_fw_version - Configure version string for ethtool
- * @adapter: adapter struct
- *
- **/
-static void igb_set_fw_version(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct e1000_fw_version fw;
-
-	e1000_get_fw_version(hw, &fw);
-
-	switch (hw->mac.type) {
-	case e1000_i210:
-	case e1000_i211:
-		if (!(e1000_get_flash_presence_i210(hw))) {
-			snprintf(adapter->fw_version,
-			    sizeof(adapter->fw_version),
-			    "%2d.%2d-%d",
-			    fw.invm_major, fw.invm_minor, fw.invm_img_type);
-			break;
-		}
-		/* fall through */
-	default:
-		/* if option rom is valid, display its version too*/
-		if (fw.or_valid) {
-			snprintf(adapter->fw_version,
-			    sizeof(adapter->fw_version),
-			    "%d.%d, 0x%08x, %d.%d.%d",
-			    fw.eep_major, fw.eep_minor, fw.etrack_id,
-			    fw.or_major, fw.or_build, fw.or_patch);
-		/* no option rom */
-		} else {
-			if (fw.etrack_id != 0X0000) {
-			snprintf(adapter->fw_version,
-			    sizeof(adapter->fw_version),
-			    "%d.%d, 0x%08x",
-			    fw.eep_major, fw.eep_minor, fw.etrack_id);
-			} else {
-			snprintf(adapter->fw_version,
-			    sizeof(adapter->fw_version),
-			    "%d.%d.%d",
-			    fw.eep_major, fw.eep_minor, fw.eep_build);
-			}
-		}
-		break;
-	}
-
-	return;
-}
-
-/**
- * igb_init_mas - init Media Autosense feature if enabled in the NVM
- *
- * @adapter: adapter struct
- **/
-static void igb_init_mas(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u16 eeprom_data;
-
-	e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data);
-	switch (hw->bus.func) {
-	case E1000_FUNC_0:
-		if (eeprom_data & IGB_MAS_ENABLE_0)
-			adapter->flags |= IGB_FLAG_MAS_ENABLE;
-		break;
-	case E1000_FUNC_1:
-		if (eeprom_data & IGB_MAS_ENABLE_1)
-			adapter->flags |= IGB_FLAG_MAS_ENABLE;
-		break;
-	case E1000_FUNC_2:
-		if (eeprom_data & IGB_MAS_ENABLE_2)
-			adapter->flags |= IGB_FLAG_MAS_ENABLE;
-		break;
-	case E1000_FUNC_3:
-		if (eeprom_data & IGB_MAS_ENABLE_3)
-			adapter->flags |= IGB_FLAG_MAS_ENABLE;
-		break;
-	default:
-		/* Shouldn't get here */
-		dev_err(pci_dev_to_dev(adapter->pdev),
-			"%s:AMS: Invalid port configuration, returning\n",
-			adapter->netdev->name);
-		break;
-	}
-}
-
-/**
- * igb_probe - Device Initialization Routine
- * @pdev: PCI device information struct
- * @ent: entry in igb_pci_tbl
- *
- * Returns 0 on success, negative on failure
- *
- * igb_probe initializes an adapter identified by a pci_dev structure.
- * The OS initialization, configuring of the adapter private structure,
- * and a hardware reset occur.
- **/
-static int __devinit igb_probe(struct pci_dev *pdev,
-			       const struct pci_device_id *ent)
-{
-	struct net_device *netdev;
-	struct igb_adapter *adapter;
-	struct e1000_hw *hw;
-	u16 eeprom_data = 0;
-	u8 pba_str[E1000_PBANUM_LENGTH];
-	s32 ret_val;
-	static int global_quad_port_a; /* global quad port a indication */
-	int i, err, pci_using_dac;
-	static int cards_found;
-
-	err = pci_enable_device_mem(pdev);
-	if (err)
-		return err;
-
-	pci_using_dac = 0;
-	err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
-	if (!err) {
-		err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
-		if (!err)
-			pci_using_dac = 1;
-	} else {
-		err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
-		if (err) {
-			err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
-			if (err) {
-				IGB_ERR("No usable DMA configuration, "
-				        "aborting\n");
-				goto err_dma;
-			}
-		}
-	}
-
-#ifndef HAVE_ASPM_QUIRKS
-	/* 82575 requires that the pci-e link partner disable the L0s state */
-	switch (pdev->device) {
-	case E1000_DEV_ID_82575EB_COPPER:
-	case E1000_DEV_ID_82575EB_FIBER_SERDES:
-	case E1000_DEV_ID_82575GB_QUAD_COPPER:
-		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
-	default:
-		break;
-	}
-
-#endif /* HAVE_ASPM_QUIRKS */
-	err = pci_request_selected_regions(pdev,
-	                                   pci_select_bars(pdev,
-                                                           IORESOURCE_MEM),
-	                                   igb_driver_name);
-	if (err)
-		goto err_pci_reg;
-
-	pci_enable_pcie_error_reporting(pdev);
-
-	pci_set_master(pdev);
-
-	err = -ENOMEM;
-#ifdef HAVE_TX_MQ
-	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
-	                           IGB_MAX_TX_QUEUES);
-#else
-	netdev = alloc_etherdev(sizeof(struct igb_adapter));
-#endif /* HAVE_TX_MQ */
-	if (!netdev)
-		goto err_alloc_etherdev;
-
-	SET_MODULE_OWNER(netdev);
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	pci_set_drvdata(pdev, netdev);
-	adapter = netdev_priv(netdev);
-	adapter->netdev = netdev;
-	adapter->pdev = pdev;
-	hw = &adapter->hw;
-	hw->back = adapter;
-	adapter->port_num = hw->bus.func;
-	adapter->msg_enable = (1 << debug) - 1;
-
-#ifdef HAVE_PCI_ERS
-	err = pci_save_state(pdev);
-	if (err)
-		goto err_ioremap;
-#endif
-	err = -EIO;
-	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
-	                      pci_resource_len(pdev, 0));
-	if (!hw->hw_addr)
-		goto err_ioremap;
-
-#ifdef HAVE_NET_DEVICE_OPS
-	netdev->netdev_ops = &igb_netdev_ops;
-#else /* HAVE_NET_DEVICE_OPS */
-	netdev->open = &igb_open;
-	netdev->stop = &igb_close;
-	netdev->get_stats = &igb_get_stats;
-#ifdef HAVE_SET_RX_MODE
-	netdev->set_rx_mode = &igb_set_rx_mode;
-#endif
-	netdev->set_multicast_list = &igb_set_rx_mode;
-	netdev->set_mac_address = &igb_set_mac;
-	netdev->change_mtu = &igb_change_mtu;
-	netdev->do_ioctl = &igb_ioctl;
-#ifdef HAVE_TX_TIMEOUT
-	netdev->tx_timeout = &igb_tx_timeout;
-#endif
-	netdev->vlan_rx_register = igb_vlan_mode;
-	netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
-	netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	netdev->poll_controller = igb_netpoll;
-#endif
-	netdev->hard_start_xmit = &igb_xmit_frame;
-#endif /* HAVE_NET_DEVICE_OPS */
-	igb_set_ethtool_ops(netdev);
-#ifdef HAVE_TX_TIMEOUT
-	netdev->watchdog_timeo = 5 * HZ;
-#endif
-
-	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
-
-	adapter->bd_number = cards_found;
-
-	/* setup the private structure */
-	err = igb_sw_init(adapter);
-	if (err)
-		goto err_sw_init;
-
-	e1000_get_bus_info(hw);
-
-	hw->phy.autoneg_wait_to_complete = FALSE;
-	hw->mac.adaptive_ifs = FALSE;
-
-	/* Copper options */
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		hw->phy.mdix = AUTO_ALL_MODES;
-		hw->phy.disable_polarity_correction = FALSE;
-		hw->phy.ms_type = e1000_ms_hw_default;
-	}
-
-	if (e1000_check_reset_block(hw))
-		dev_info(pci_dev_to_dev(pdev),
-			"PHY reset is blocked due to SOL/IDER session.\n");
-
-	/*
-	 * features is initialized to 0 in allocation, it might have bits
-	 * set by igb_sw_init so we should use an or instead of an
-	 * assignment.
-	 */
-	netdev->features |= NETIF_F_SG |
-			    NETIF_F_IP_CSUM |
-#ifdef NETIF_F_IPV6_CSUM
-			    NETIF_F_IPV6_CSUM |
-#endif
-#ifdef NETIF_F_TSO
-			    NETIF_F_TSO |
-#ifdef NETIF_F_TSO6
-			    NETIF_F_TSO6 |
-#endif
-#endif /* NETIF_F_TSO */
-#ifdef NETIF_F_RXHASH
-			    NETIF_F_RXHASH |
-#endif
-			    NETIF_F_RXCSUM |
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_TX;
-#else
-			    NETIF_F_HW_VLAN_RX |
-			    NETIF_F_HW_VLAN_TX;
-#endif
-
-	if (hw->mac.type >= e1000_82576)
-		netdev->features |= NETIF_F_SCTP_CSUM;
-
-#ifdef HAVE_NDO_SET_FEATURES
-	/* copy netdev features into list of user selectable features */
-	netdev->hw_features |= netdev->features;
-#ifndef IGB_NO_LRO
-
-	/* give us the option of enabling LRO later */
-	netdev->hw_features |= NETIF_F_LRO;
-#endif
-#else
-#ifdef NETIF_F_GRO
-
-	/* this is only needed on kernels prior to 2.6.39 */
-	netdev->features |= NETIF_F_GRO;
-#endif
-#endif
-
-	/* set this bit last since it cannot be part of hw_features */
-#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
-	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#else
-	netdev->features |= NETIF_F_HW_VLAN_FILTER;
-#endif
-
-#ifdef HAVE_NETDEV_VLAN_FEATURES
-	netdev->vlan_features |= NETIF_F_TSO |
-				 NETIF_F_TSO6 |
-				 NETIF_F_IP_CSUM |
-				 NETIF_F_IPV6_CSUM |
-				 NETIF_F_SG;
-
-#endif
-	if (pci_using_dac)
-		netdev->features |= NETIF_F_HIGHDMA;
-
-	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
-#ifdef DEBUG
-	if (adapter->dmac != IGB_DMAC_DISABLE)
-		printk("%s: DMA Coalescing is enabled..\n", netdev->name);
-#endif
-
-	/* before reading the NVM, reset the controller to put the device in a
-	 * known good starting state */
-	e1000_reset_hw(hw);
-
-	/* make sure the NVM is good */
-	if (e1000_validate_nvm_checksum(hw) < 0) {
-		dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
-		        " Valid\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	/* copy the MAC address out of the NVM */
-	if (e1000_read_mac_addr(hw))
-		dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
-	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
-#ifdef ETHTOOL_GPERMADDR
-	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
-
-	if (!is_valid_ether_addr(netdev->perm_addr)) {
-#else
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-#endif
-		dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
-	adapter->mac_table[0].queue = adapter->vfs_allocated_count;
-	adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
-	igb_rar_set(adapter, 0);
-
-	/* get firmware version for ethtool -i */
-	igb_set_fw_version(adapter);
-
-	/* Check if Media Autosense is enabled */
-	if (hw->mac.type == e1000_82580)
-		igb_init_mas(adapter);
-#ifdef HAVE_TIMER_SETUP
-	timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0);
-	timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0);
-#else
-	setup_timer(&adapter->watchdog_timer, &igb_watchdog,
-	            (unsigned long) adapter);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
-			    (unsigned long) adapter);
-	setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
-	            (unsigned long) adapter);
-#endif
-
-	INIT_WORK(&adapter->reset_task, igb_reset_task);
-	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
-
-	/* Initialize link properties that are user-changeable */
-	adapter->fc_autoneg = true;
-	hw->mac.autoneg = true;
-	hw->phy.autoneg_advertised = 0x2f;
-
-	hw->fc.requested_mode = e1000_fc_default;
-	hw->fc.current_mode = e1000_fc_default;
-
-	e1000_validate_mdi_setting(hw);
-
-	/* By default, support wake on port A */
-	if (hw->bus.func == 0)
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-
-	/* Check the NVM for wake support for non-port A ports */
-	if (hw->mac.type >= e1000_82580)
-		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
-		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
-		                 &eeprom_data);
-	else if (hw->bus.func == 1)
-		e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
-
-	if (eeprom_data & IGB_EEPROM_APME)
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-
-	/* now that we have the eeprom settings, apply the special cases where
-	 * the eeprom may be wrong or the board simply won't support wake on
-	 * lan on a particular port */
-	switch (pdev->device) {
-	case E1000_DEV_ID_82575GB_QUAD_COPPER:
-		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	case E1000_DEV_ID_82575EB_FIBER_SERDES:
-	case E1000_DEV_ID_82576_FIBER:
-	case E1000_DEV_ID_82576_SERDES:
-		/* Wake events only supported on port A for dual fiber
-		 * regardless of eeprom setting */
-		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	case E1000_DEV_ID_82576_QUAD_COPPER:
-	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
-		/* if quad port adapter, disable WoL on all but port A */
-		if (global_quad_port_a != 0)
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		else
-			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
-		/* Reset for multiple quad port adapters */
-		if (++global_quad_port_a == 4)
-			global_quad_port_a = 0;
-		break;
-	default:
-		/* If the device can't wake, don't set software support */
-		if (!device_can_wakeup(&adapter->pdev->dev))
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	}
-
-	/* initialize the wol settings based on the eeprom settings */
-	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
-		adapter->wol |= E1000_WUFC_MAG;
-
-	/* Some vendors want WoL disabled by default, but still supported */
-	if ((hw->mac.type == e1000_i350) &&
-	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-		adapter->wol = 0;
-	}
-
-	device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
-				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
-
-	/* reset the hardware with the new settings */
-	igb_reset(adapter);
-	adapter->devrc = 0;
-
-#ifdef HAVE_I2C_SUPPORT
-	/* Init the I2C interface */
-	err = igb_init_i2c(adapter);
-	if (err) {
-		dev_err(&pdev->dev, "failed to init i2c interface\n");
-		goto err_eeprom;
-	}
-#endif /* HAVE_I2C_SUPPORT */
-
-	/* let the f/w know that the h/w is now under the control of the
-	 * driver. */
-	igb_get_hw_control(adapter);
-
-	strncpy(netdev->name, "eth%d", IFNAMSIZ);
-	err = register_netdev(netdev);
-	if (err)
-		goto err_register;
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	err = igb_init_vmdq_netdevs(adapter);
-	if (err)
-		goto err_register;
-#endif
-	/* carrier off reporting is important to ethtool even BEFORE open */
-	netif_carrier_off(netdev);
-
-#ifdef IGB_DCA
-	if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
-		adapter->flags |= IGB_FLAG_DCA_ENABLED;
-		dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
-		igb_setup_dca(adapter);
-	}
-
-#endif
-#ifdef HAVE_PTP_1588_CLOCK
-	/* do hw tstamp init after resetting */
-	igb_ptp_init(adapter);
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
-	/* print bus type/speed/width info */
-	dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
-	         netdev->name,
-	         ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
-	          (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
-		  (hw->mac.type == e1000_i354) ? "integrated" :
-	                                                    "unknown"),
-	         ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
-	          (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
-	          (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
-		  (hw->mac.type == e1000_i354) ? "integrated" :
-	           "unknown"));
-	dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
-	for (i = 0; i < 6; i++)
-		printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
-
-	ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
-	if (ret_val)
-		strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
-	dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
-		 pba_str);
-
-
-	/* Initialize the thermal sensor on i350 devices. */
-	if (hw->mac.type == e1000_i350) {
-		if (hw->bus.func == 0) {
-			u16 ets_word;
-
-			/*
-			 * Read the NVM to determine if this i350 device
-			 * supports an external thermal sensor.
-			 */
-			e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
-			if (ets_word != 0x0000 && ets_word != 0xFFFF)
-				adapter->ets = true;
-			else
-				adapter->ets = false;
-		}
-#ifdef IGB_HWMON
-
-		igb_sysfs_init(adapter);
-#else
-#ifdef IGB_PROCFS
-
-		igb_procfs_init(adapter);
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-	} else {
-		adapter->ets = false;
-	}
-
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		switch (hw->mac.type) {
-		case e1000_i350:
-		case e1000_i210:
-		case e1000_i211:
-			/* Enable EEE for internal copper PHY devices */
-			err = e1000_set_eee_i350(hw);
-			if (!err &&
-			    (adapter->flags & IGB_FLAG_EEE))
-				adapter->eee_advert =
-					MDIO_EEE_100TX | MDIO_EEE_1000T;
-			break;
-		case e1000_i354:
-			if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
-			    (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
-				err = e1000_set_eee_i354(hw);
-				if ((!err) &&
-				    (adapter->flags & IGB_FLAG_EEE))
-					adapter->eee_advert =
-					   MDIO_EEE_100TX | MDIO_EEE_1000T;
-			}
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* send driver version info to firmware */
-	if (hw->mac.type >= e1000_i350)
-		igb_init_fw(adapter);
-
-#ifndef IGB_NO_LRO
-	if (netdev->features & NETIF_F_LRO)
-		dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
-	else
-		dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
-#endif
-	dev_info(pci_dev_to_dev(pdev),
-	         "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
-	         adapter->msix_entries ? "MSI-X" :
-	         (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
-	         adapter->num_rx_queues, adapter->num_tx_queues);
-
-	cards_found++;
-
-	pm_runtime_put_noidle(&pdev->dev);
-	return 0;
-
-err_register:
-	igb_release_hw_control(adapter);
-#ifdef HAVE_I2C_SUPPORT
-	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
-#endif /* HAVE_I2C_SUPPORT */
-err_eeprom:
-	if (!e1000_check_reset_block(hw))
-		e1000_phy_hw_reset(hw);
-
-	if (hw->flash_address)
-		iounmap(hw->flash_address);
-err_sw_init:
-	igb_clear_interrupt_scheme(adapter);
-	igb_reset_sriov_capability(adapter);
-	iounmap(hw->hw_addr);
-err_ioremap:
-	free_netdev(netdev);
-err_alloc_etherdev:
-	pci_release_selected_regions(pdev,
-	                             pci_select_bars(pdev, IORESOURCE_MEM));
-err_pci_reg:
-err_dma:
-	pci_disable_device(pdev);
-	return err;
-}
-#ifdef HAVE_I2C_SUPPORT
-/*
- *  igb_remove_i2c - Cleanup  I2C interface
- *  @adapter: pointer to adapter structure
- *
- */
-static void igb_remove_i2c(struct igb_adapter *adapter)
-{
-
-	/* free the adapter bus structure */
-	i2c_del_adapter(&adapter->i2c_adap);
-}
-#endif /* HAVE_I2C_SUPPORT */
-
-/**
- * igb_remove - Device Removal Routine
- * @pdev: PCI device information struct
- *
- * igb_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
- * Hot-Plug event, or because the driver is going to be removed from
- * memory.
- **/
-static void __devexit igb_remove(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-
-	pm_runtime_get_noresume(&pdev->dev);
-#ifdef HAVE_I2C_SUPPORT
-	igb_remove_i2c(adapter);
-#endif /* HAVE_I2C_SUPPORT */
-#ifdef HAVE_PTP_1588_CLOCK
-	igb_ptp_stop(adapter);
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	/* flush_scheduled work may reschedule our watchdog task, so
-	 * explicitly disable watchdog tasks from being rescheduled  */
-	set_bit(__IGB_DOWN, &adapter->state);
-	del_timer_sync(&adapter->watchdog_timer);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		del_timer_sync(&adapter->dma_err_timer);
-	del_timer_sync(&adapter->phy_info_timer);
-
-	flush_scheduled_work();
-
-#ifdef IGB_DCA
-	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
-		dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
-		dca_remove_requester(&pdev->dev);
-		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-		E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
-	}
-#endif
-
-	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
-	 * would have already happened in close and is redundant. */
-	igb_release_hw_control(adapter);
-
-	unregister_netdev(netdev);
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	igb_remove_vmdq_netdevs(adapter);
-#endif
-
-	igb_clear_interrupt_scheme(adapter);
-	igb_reset_sriov_capability(adapter);
-
-	iounmap(hw->hw_addr);
-	if (hw->flash_address)
-		iounmap(hw->flash_address);
-	pci_release_selected_regions(pdev,
-	                             pci_select_bars(pdev, IORESOURCE_MEM));
-
-#ifdef IGB_HWMON
-	igb_sysfs_exit(adapter);
-#else
-#ifdef IGB_PROCFS
-	igb_procfs_exit(adapter);
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-	kfree(adapter->mac_table);
-	kfree(adapter->shadow_vfta);
-	free_netdev(netdev);
-
-	pci_disable_pcie_error_reporting(pdev);
-
-	pci_disable_device(pdev);
-}
-
-/**
- * igb_sw_init - Initialize general software structures (struct igb_adapter)
- * @adapter: board private structure to initialize
- *
- * igb_sw_init initializes the Adapter private data structure.
- * Fields are initialized based on PCI device information and
- * OS network device settings (MTU size).
- **/
-static int igb_sw_init(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-
-	/* PCI config space info */
-
-	hw->vendor_id = pdev->vendor;
-	hw->device_id = pdev->device;
-	hw->subsystem_vendor_id = pdev->subsystem_vendor;
-	hw->subsystem_device_id = pdev->subsystem_device;
-
-	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
-
-	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
-
-	/* set default ring sizes */
-	adapter->tx_ring_count = IGB_DEFAULT_TXD;
-	adapter->rx_ring_count = IGB_DEFAULT_RXD;
-
-	/* set default work limits */
-	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
-
-	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
-					      VLAN_HLEN;
-
-	/* Initialize the hardware-specific values */
-	if (e1000_setup_init_funcs(hw, TRUE)) {
-		dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
-		return -EIO;
-	}
-
-	adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
-				     hw->mac.rar_entry_count,
-				     GFP_ATOMIC);
-
-	/* Setup and initialize a copy of the hw vlan table array */
-	adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
-				       GFP_ATOMIC);
-#ifdef NO_KNI
-	/* These calls may decrease the number of queues */
-	if (hw->mac.type < e1000_i210) {
-		igb_set_sriov_capability(adapter);
-	}
-
-	if (igb_init_interrupt_scheme(adapter, true)) {
-		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
-		return -ENOMEM;
-	}
-
-	/* Explicitly disable IRQ since the NIC can be in any state. */
-	igb_irq_disable(adapter);
-
-	set_bit(__IGB_DOWN, &adapter->state);
-#endif
-	return 0;
-}
-
-/**
- * igb_open - Called when a network interface is made active
- * @netdev: network interface device structure
- *
- * Returns 0 on success, negative value on failure
- *
- * The open entry point is called when a network interface is made
- * active by the system (IFF_UP).  At this point all resources needed
- * for transmit and receive operations are allocated, the interrupt
- * handler is registered with the OS, the watchdog timer is started,
- * and the stack is notified that the interface is ready.
- **/
-static int __igb_open(struct net_device *netdev, bool resuming)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-#ifdef CONFIG_PM_RUNTIME
-	struct pci_dev *pdev = adapter->pdev;
-#endif /* CONFIG_PM_RUNTIME */
-	int err;
-	int i;
-
-	/* disallow open during test */
-	if (test_bit(__IGB_TESTING, &adapter->state)) {
-		WARN_ON(resuming);
-		return -EBUSY;
-	}
-
-#ifdef CONFIG_PM_RUNTIME
-	if (!resuming)
-		pm_runtime_get_sync(&pdev->dev);
-#endif /* CONFIG_PM_RUNTIME */
-
-	netif_carrier_off(netdev);
-
-	/* allocate transmit descriptors */
-	err = igb_setup_all_tx_resources(adapter);
-	if (err)
-		goto err_setup_tx;
-
-	/* allocate receive descriptors */
-	err = igb_setup_all_rx_resources(adapter);
-	if (err)
-		goto err_setup_rx;
-
-	igb_power_up_link(adapter);
-
-	/* before we allocate an interrupt, we must be ready to handle it.
-	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
-	 * as soon as we call pci_request_irq, so we have to setup our
-	 * clean_rx handler before we do so.  */
-	igb_configure(adapter);
-
-	err = igb_request_irq(adapter);
-	if (err)
-		goto err_req_irq;
-
-	/* Notify the stack of the actual queue counts. */
-	netif_set_real_num_tx_queues(netdev,
-				     adapter->vmdq_pools ? 1 :
-				     adapter->num_tx_queues);
-
-	err = netif_set_real_num_rx_queues(netdev,
-					   adapter->vmdq_pools ? 1 :
-					   adapter->num_rx_queues);
-	if (err)
-		goto err_set_queues;
-
-	/* From here on the code is the same as igb_up() */
-	clear_bit(__IGB_DOWN, &adapter->state);
-
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		napi_enable(&(adapter->q_vector[i]->napi));
-	igb_configure_lli(adapter);
-
-	/* Clear any pending interrupts. */
-	E1000_READ_REG(hw, E1000_ICR);
-
-	igb_irq_enable(adapter);
-
-	/* notify VFs that reset has been completed */
-	if (adapter->vfs_allocated_count) {
-		u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
-		reg_data |= E1000_CTRL_EXT_PFRSTD;
-		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
-	}
-
-	netif_tx_start_all_queues(netdev);
-
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		schedule_work(&adapter->dma_err_task);
-
-	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
-	schedule_work(&adapter->watchdog_task);
-
-	return E1000_SUCCESS;
-
-err_set_queues:
-	igb_free_irq(adapter);
-err_req_irq:
-	igb_release_hw_control(adapter);
-	igb_power_down_link(adapter);
-	igb_free_all_rx_resources(adapter);
-err_setup_rx:
-	igb_free_all_tx_resources(adapter);
-err_setup_tx:
-	igb_reset(adapter);
-
-#ifdef CONFIG_PM_RUNTIME
-	if (!resuming)
-		pm_runtime_put(&pdev->dev);
-#endif /* CONFIG_PM_RUNTIME */
-
-	return err;
-}
-
-static int igb_open(struct net_device *netdev)
-{
-	return __igb_open(netdev, false);
-}
-
-/**
- * igb_close - Disables a network interface
- * @netdev: network interface device structure
- *
- * Returns 0, this is not allowed to fail
- *
- * The close entry point is called when an interface is de-activated
- * by the OS.  The hardware is still under the driver's control, but
- * needs to be disabled.  A global MAC reset is issued to stop the
- * hardware, and all transmit and receive resources are freed.
- **/
-static int __igb_close(struct net_device *netdev, bool suspending)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-#ifdef CONFIG_PM_RUNTIME
-	struct pci_dev *pdev = adapter->pdev;
-#endif /* CONFIG_PM_RUNTIME */
-
-	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
-
-#ifdef CONFIG_PM_RUNTIME
-	if (!suspending)
-		pm_runtime_get_sync(&pdev->dev);
-#endif /* CONFIG_PM_RUNTIME */
-
-	igb_down(adapter);
-
-	igb_release_hw_control(adapter);
-
-	igb_free_irq(adapter);
-
-	igb_free_all_tx_resources(adapter);
-	igb_free_all_rx_resources(adapter);
-
-#ifdef CONFIG_PM_RUNTIME
-	if (!suspending)
-		pm_runtime_put_sync(&pdev->dev);
-#endif /* CONFIG_PM_RUNTIME */
-
-	return 0;
-}
-
-static int igb_close(struct net_device *netdev)
-{
-	return __igb_close(netdev, false);
-}
-
-/**
- * igb_setup_tx_resources - allocate Tx resources (Descriptors)
- * @tx_ring: tx descriptor ring (for a specific queue) to setup
- *
- * Return 0 on success, negative on failure
- **/
-int igb_setup_tx_resources(struct igb_ring *tx_ring)
-{
-	struct device *dev = tx_ring->dev;
-	int size;
-
-	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
-	tx_ring->tx_buffer_info = vzalloc(size);
-	if (!tx_ring->tx_buffer_info)
-		goto err;
-
-	/* round up to nearest 4K */
-	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
-	tx_ring->size = ALIGN(tx_ring->size, 4096);
-
-	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
-					   &tx_ring->dma, GFP_KERNEL);
-
-	if (!tx_ring->desc)
-		goto err;
-
-	tx_ring->next_to_use = 0;
-	tx_ring->next_to_clean = 0;
-
-	return 0;
-
-err:
-	vfree(tx_ring->tx_buffer_info);
-	dev_err(dev,
-		"Unable to allocate memory for the transmit descriptor ring\n");
-	return -ENOMEM;
-}
-
-/**
- * igb_setup_all_tx_resources - wrapper to allocate Tx resources
- *				  (Descriptors) for all queues
- * @adapter: board private structure
- *
- * Return 0 on success, negative on failure
- **/
-static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		err = igb_setup_tx_resources(adapter->tx_ring[i]);
-		if (err) {
-			dev_err(pci_dev_to_dev(pdev),
-				"Allocation for Tx Queue %u failed\n", i);
-			for (i--; i >= 0; i--)
-				igb_free_tx_resources(adapter->tx_ring[i]);
-			break;
-		}
-	}
-
-	return err;
-}
-
-/**
- * igb_setup_tctl - configure the transmit control registers
- * @adapter: Board private structure
- **/
-void igb_setup_tctl(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 tctl;
-
-	/* disable queue 0 which is enabled by default on 82575 and 82576 */
-	E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
-
-	/* Program the Transmit Control Register */
-	tctl = E1000_READ_REG(hw, E1000_TCTL);
-	tctl &= ~E1000_TCTL_CT;
-	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
-		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
-
-	e1000_config_collision_dist(hw);
-
-	/* Enable transmits */
-	tctl |= E1000_TCTL_EN;
-
-	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
-}
-
-static u32 igb_tx_wthresh(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	switch (hw->mac.type) {
-	case e1000_i354:
-		return 4;
-	case e1000_82576:
-		if (adapter->msix_entries)
-			return 1;
-	default:
-		break;
-	}
-
-	return 16;
-}
-
-/**
- * igb_configure_tx_ring - Configure transmit ring after Reset
- * @adapter: board private structure
- * @ring: tx ring to configure
- *
- * Configure a transmit ring after a reset.
- **/
-void igb_configure_tx_ring(struct igb_adapter *adapter,
-                           struct igb_ring *ring)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 txdctl = 0;
-	u64 tdba = ring->dma;
-	int reg_idx = ring->reg_idx;
-
-	/* disable the queue */
-	E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
-	E1000_WRITE_FLUSH(hw);
-	mdelay(10);
-
-	E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
-	                ring->count * sizeof(union e1000_adv_tx_desc));
-	E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
-	                tdba & 0x00000000ffffffffULL);
-	E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
-
-	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
-	E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
-	writel(0, ring->tail);
-
-	txdctl |= IGB_TX_PTHRESH;
-	txdctl |= IGB_TX_HTHRESH << 8;
-	txdctl |= igb_tx_wthresh(adapter) << 16;
-
-	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
-	E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
-}
-
-/**
- * igb_configure_tx - Configure transmit Unit after Reset
- * @adapter: board private structure
- *
- * Configure the Tx unit of the MAC after a reset.
- **/
-static void igb_configure_tx(struct igb_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
-}
-
-/**
- * igb_setup_rx_resources - allocate Rx resources (Descriptors)
- * @rx_ring:    rx descriptor ring (for a specific queue) to setup
- *
- * Returns 0 on success, negative on failure
- **/
-int igb_setup_rx_resources(struct igb_ring *rx_ring)
-{
-	struct device *dev = rx_ring->dev;
-	int size, desc_len;
-
-	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
-	rx_ring->rx_buffer_info = vzalloc(size);
-	if (!rx_ring->rx_buffer_info)
-		goto err;
-
-	desc_len = sizeof(union e1000_adv_rx_desc);
-
-	/* Round up to nearest 4K */
-	rx_ring->size = rx_ring->count * desc_len;
-	rx_ring->size = ALIGN(rx_ring->size, 4096);
-
-	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
-					   &rx_ring->dma, GFP_KERNEL);
-
-	if (!rx_ring->desc)
-		goto err;
-
-	rx_ring->next_to_alloc = 0;
-	rx_ring->next_to_clean = 0;
-	rx_ring->next_to_use = 0;
-
-	return 0;
-
-err:
-	vfree(rx_ring->rx_buffer_info);
-	rx_ring->rx_buffer_info = NULL;
-	dev_err(dev, "Unable to allocate memory for the receive descriptor"
-		" ring\n");
-	return -ENOMEM;
-}
-
-/**
- * igb_setup_all_rx_resources - wrapper to allocate Rx resources
- *				  (Descriptors) for all queues
- * @adapter: board private structure
- *
- * Return 0 on success, negative on failure
- **/
-static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		err = igb_setup_rx_resources(adapter->rx_ring[i]);
-		if (err) {
-			dev_err(pci_dev_to_dev(pdev),
-				"Allocation for Rx Queue %u failed\n", i);
-			for (i--; i >= 0; i--)
-				igb_free_rx_resources(adapter->rx_ring[i]);
-			break;
-		}
-	}
-
-	return err;
-}
-
-/**
- * igb_setup_mrqc - configure the multiple receive queue control registers
- * @adapter: Board private structure
- **/
-static void igb_setup_mrqc(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 mrqc, rxcsum;
-	u32 j, num_rx_queues, shift = 0, shift2 = 0;
-	static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
-					0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
-					0xA32DCB77, 0x0CF23080, 0x3BB7426A,
-					0xFA01ACBE };
-
-	/* Fill out hash function seeds */
-	for (j = 0; j < 10; j++)
-		E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]);
-
-	num_rx_queues = adapter->rss_queues;
-
-	/* 82575 and 82576 supports 2 RSS queues for VMDq */
-	switch (hw->mac.type) {
-	case e1000_82575:
-		if (adapter->vmdq_pools) {
-			shift = 2;
-			shift2 = 6;
-			break;
-		}
-		shift = 6;
-		break;
-	case e1000_82576:
-		/* 82576 supports 2 RSS queues for SR-IOV */
-		if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
-			shift = 3;
-			num_rx_queues = 2;
-		}
-		break;
-	default:
-		break;
-	}
-
-	/*
-	 * Populate the redirection table 4 entries at a time.  To do this
-	 * we are generating the results for n and n+2 and then interleaving
-	 * those with the results with n+1 and n+3.
-	 */
-	for (j = 0; j < 32; j++) {
-		/* first pass generates n and n+2 */
-		u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues;
-		u32 reta = (base & 0x07800780) >> (7 - shift);
-
-		/* second pass generates n+1 and n+3 */
-		base += 0x00010001 * num_rx_queues;
-		reta |= (base & 0x07800780) << (1 + shift);
-
-		/* generate 2nd table for 82575 based parts */
-		if (shift2)
-			reta |= (0x01010101 * num_rx_queues) << shift2;
-
-		E1000_WRITE_REG(hw, E1000_RETA(j), reta);
-	}
-
-	/*
-	 * Disable raw packet checksumming so that RSS hash is placed in
-	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
-	 * offloads as they are enabled by default
-	 */
-	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
-	rxcsum |= E1000_RXCSUM_PCSD;
-
-	if (adapter->hw.mac.type >= e1000_82576)
-		/* Enable Receive Checksum Offload for SCTP */
-		rxcsum |= E1000_RXCSUM_CRCOFL;
-
-	/* Don't need to set TUOFL or IPOFL, they default to 1 */
-	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
-
-	/* Generate RSS hash based on packet types, TCP/UDP
-	 * port numbers and/or IPv4/v6 src and dst addresses
-	 */
-	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
-	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
-	       E1000_MRQC_RSS_FIELD_IPV6 |
-	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
-	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
-
-	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
-		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
-	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
-		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
-
-	/* If VMDq is enabled then we set the appropriate mode for that, else
-	 * we default to RSS so that an RSS hash is calculated per packet even
-	 * if we are only using one queue */
-	if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
-		if (hw->mac.type > e1000_82575) {
-			/* Set the default pool for the PF's first queue */
-			u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
-			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
-				   E1000_VT_CTL_DISABLE_DEF_POOL);
-			vtctl |= adapter->vfs_allocated_count <<
-				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
-			E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
-		} else if (adapter->rss_queues > 1) {
-			/* set default queue for pool 1 to queue 2 */
-			E1000_WRITE_REG(hw, E1000_VT_CTL,
-				        adapter->rss_queues << 7);
-		}
-		if (adapter->rss_queues > 1)
-			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
-		else
-			mrqc |= E1000_MRQC_ENABLE_VMDQ;
-	} else {
-		mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
-	}
-	igb_vmm_control(adapter);
-
-	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
-}
-
-/**
- * igb_setup_rctl - configure the receive control registers
- * @adapter: Board private structure
- **/
-void igb_setup_rctl(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 rctl;
-
-	rctl = E1000_READ_REG(hw, E1000_RCTL);
-
-	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
-	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
-
-	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
-		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
-
-	/*
-	 * enable stripping of CRC. It's unlikely this will break BMC
-	 * redirection as it did with e1000. Newer features require
-	 * that the HW strips the CRC.
-	 */
-	rctl |= E1000_RCTL_SECRC;
-
-	/* disable store bad packets and clear size bits. */
-	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
-
-	/* enable LPE to prevent packets larger than max_frame_size */
-	rctl |= E1000_RCTL_LPE;
-
-	/* disable queue 0 to prevent tail write w/o re-config */
-	E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
-
-	/* Attention!!!  For SR-IOV PF driver operations you must enable
-	 * queue drop for all VF and PF queues to prevent head of line blocking
-	 * if an un-trusted VF does not provide descriptors to hardware.
-	 */
-	if (adapter->vfs_allocated_count) {
-		/* set all queue drop enable bits */
-		E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
-	}
-
-	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-}
-
-static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
-                                   int vfn)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 vmolr;
-
-	/* if it isn't the PF check to see if VFs are enabled and
-	 * increase the size to support vlan tags */
-	if (vfn < adapter->vfs_allocated_count &&
-	    adapter->vf_data[vfn].vlans_enabled)
-		size += VLAN_HLEN;
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	if (vfn >= adapter->vfs_allocated_count) {
-		int queue = vfn - adapter->vfs_allocated_count;
-		struct igb_vmdq_adapter *vadapter;
-
-		vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
-		if (vadapter->vlgrp)
-			size += VLAN_HLEN;
-	}
-#endif
-	vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
-	vmolr &= ~E1000_VMOLR_RLPML_MASK;
-	vmolr |= size | E1000_VMOLR_LPE;
-	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
-
-	return 0;
-}
-
-/**
- * igb_rlpml_set - set maximum receive packet size
- * @adapter: board private structure
- *
- * Configure maximum receivable packet size.
- **/
-static void igb_rlpml_set(struct igb_adapter *adapter)
-{
-	u32 max_frame_size = adapter->max_frame_size;
-	struct e1000_hw *hw = &adapter->hw;
-	u16 pf_id = adapter->vfs_allocated_count;
-
-	if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
-		int i;
-		for (i = 0; i < adapter->vmdq_pools; i++)
-			igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
-		/*
-		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
-		 * to our max jumbo frame size, in case we need to enable
-		 * jumbo frames on one of the rings later.
-		 * This will not pass over-length frames into the default
-		 * queue because it's gated by the VMOLR.RLPML.
-		 */
-		max_frame_size = MAX_JUMBO_FRAME_SIZE;
-	}
-	/* Set VF RLPML for the PF device. */
-	if (adapter->vfs_allocated_count)
-		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
-
-	E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
-}
-
-static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
-					int vfn, bool enable)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 val;
-	void __iomem *reg;
-
-	if (hw->mac.type < e1000_82576)
-		return;
-
-	if (hw->mac.type == e1000_i350)
-		reg = hw->hw_addr + E1000_DVMOLR(vfn);
-	else
-		reg = hw->hw_addr + E1000_VMOLR(vfn);
-
-	val = readl(reg);
-	if (enable)
-		val |= E1000_VMOLR_STRVLAN;
-	else
-		val &= ~(E1000_VMOLR_STRVLAN);
-	writel(val, reg);
-}
-static inline void igb_set_vmolr(struct igb_adapter *adapter,
-				 int vfn, bool aupe)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 vmolr;
-
-	/*
-	 * This register exists only on 82576 and newer so if we are older then
-	 * we should exit and do nothing
-	 */
-	if (hw->mac.type < e1000_82576)
-		return;
-
-	vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
-
-	if (aupe)
-		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
-	else
-		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
-
-	/* clear all bits that might not be set */
-	vmolr &= ~E1000_VMOLR_RSSE;
-
-	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
-		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
-
-	vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
-	vmolr |= E1000_VMOLR_LPE;	   /* Accept long packets */
-
-	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
-}
-
-/**
- * igb_configure_rx_ring - Configure a receive ring after Reset
- * @adapter: board private structure
- * @ring: receive ring to be configured
- *
- * Configure the Rx unit of the MAC after a reset.
- **/
-void igb_configure_rx_ring(struct igb_adapter *adapter,
-                           struct igb_ring *ring)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u64 rdba = ring->dma;
-	int reg_idx = ring->reg_idx;
-	u32 srrctl = 0, rxdctl = 0;
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	/*
-	 * RLPML prevents us from receiving a frame larger than max_frame so
-	 * it is safe to just set the rx_buffer_len to max_frame without the
-	 * risk of an skb over panic.
-	 */
-	ring->rx_buffer_len = max_t(u32, adapter->max_frame_size,
-				    MAXIMUM_ETHERNET_VLAN_SIZE);
-
-#endif
-	/* disable the queue */
-	E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
-
-	/* Set DMA base address registers */
-	E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
-	                rdba & 0x00000000ffffffffULL);
-	E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
-	E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
-	               ring->count * sizeof(union e1000_adv_rx_desc));
-
-	/* initialize head and tail */
-	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
-	E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
-	writel(0, ring->tail);
-
-	/* reset next-to- use/clean to place SW in sync with hardwdare */
-	ring->next_to_clean = 0;
-	ring->next_to_use = 0;
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	ring->next_to_alloc = 0;
-
-#endif
-	/* set descriptor configuration */
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
-#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-	srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
-	         E1000_SRRCTL_BSIZEPKT_SHIFT;
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
-#ifdef HAVE_PTP_1588_CLOCK
-	if (hw->mac.type >= e1000_82580)
-		srrctl |= E1000_SRRCTL_TIMESTAMP;
-#endif /* HAVE_PTP_1588_CLOCK */
-	/*
-	 * We should set the drop enable bit if:
-	 *  SR-IOV is enabled
-	 *   or
-	 *  Flow Control is disabled and number of RX queues > 1
-	 *
-	 *  This allows us to avoid head of line blocking for security
-	 *  and performance reasons.
-	 */
-	if (adapter->vfs_allocated_count ||
-	    (adapter->num_rx_queues > 1 &&
-	     (hw->fc.requested_mode == e1000_fc_none ||
-	      hw->fc.requested_mode == e1000_fc_rx_pause)))
-		srrctl |= E1000_SRRCTL_DROP_EN;
-
-	E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
-
-	/* set filtering for VMDQ pools */
-	igb_set_vmolr(adapter, reg_idx & 0x7, true);
-
-	rxdctl |= IGB_RX_PTHRESH;
-	rxdctl |= IGB_RX_HTHRESH << 8;
-	rxdctl |= IGB_RX_WTHRESH << 16;
-
-	/* enable receive descriptor fetching */
-	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
-	E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
-}
-
-/**
- * igb_configure_rx - Configure receive Unit after Reset
- * @adapter: board private structure
- *
- * Configure the Rx unit of the MAC after a reset.
- **/
-static void igb_configure_rx(struct igb_adapter *adapter)
-{
-	int i;
-
-	/* set UTA to appropriate mode */
-	igb_set_uta(adapter);
-
-	igb_full_sync_mac_table(adapter);
-	/* Setup the HW Rx Head and Tail Descriptor Pointers and
-	 * the Base and Length of the Rx Descriptor Ring */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
-}
-
-/**
- * igb_free_tx_resources - Free Tx Resources per Queue
- * @tx_ring: Tx descriptor ring for a specific queue
- *
- * Free all transmit software resources
- **/
-void igb_free_tx_resources(struct igb_ring *tx_ring)
-{
-	igb_clean_tx_ring(tx_ring);
-
-	vfree(tx_ring->tx_buffer_info);
-	tx_ring->tx_buffer_info = NULL;
-
-	/* if not set, then don't free */
-	if (!tx_ring->desc)
-		return;
-
-	dma_free_coherent(tx_ring->dev, tx_ring->size,
-			  tx_ring->desc, tx_ring->dma);
-
-	tx_ring->desc = NULL;
-}
-
-/**
- * igb_free_all_tx_resources - Free Tx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all transmit software resources
- **/
-static void igb_free_all_tx_resources(struct igb_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		igb_free_tx_resources(adapter->tx_ring[i]);
-}
-
-void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
-				    struct igb_tx_buffer *tx_buffer)
-{
-	if (tx_buffer->skb) {
-		dev_kfree_skb_any(tx_buffer->skb);
-		if (dma_unmap_len(tx_buffer, len))
-			dma_unmap_single(ring->dev,
-			                 dma_unmap_addr(tx_buffer, dma),
-			                 dma_unmap_len(tx_buffer, len),
-			                 DMA_TO_DEVICE);
-	} else if (dma_unmap_len(tx_buffer, len)) {
-		dma_unmap_page(ring->dev,
-		               dma_unmap_addr(tx_buffer, dma),
-		               dma_unmap_len(tx_buffer, len),
-		               DMA_TO_DEVICE);
-	}
-	tx_buffer->next_to_watch = NULL;
-	tx_buffer->skb = NULL;
-	dma_unmap_len_set(tx_buffer, len, 0);
-	/* buffer_info must be completely set up in the transmit path */
-}
-
-/**
- * igb_clean_tx_ring - Free Tx Buffers
- * @tx_ring: ring to be cleaned
- **/
-static void igb_clean_tx_ring(struct igb_ring *tx_ring)
-{
-	struct igb_tx_buffer *buffer_info;
-	unsigned long size;
-	u16 i;
-
-	if (!tx_ring->tx_buffer_info)
-		return;
-	/* Free all the Tx ring sk_buffs */
-
-	for (i = 0; i < tx_ring->count; i++) {
-		buffer_info = &tx_ring->tx_buffer_info[i];
-		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
-	}
-
-	netdev_tx_reset_queue(txring_txq(tx_ring));
-
-	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
-	memset(tx_ring->tx_buffer_info, 0, size);
-
-	/* Zero out the descriptor ring */
-	memset(tx_ring->desc, 0, tx_ring->size);
-
-	tx_ring->next_to_use = 0;
-	tx_ring->next_to_clean = 0;
-}
-
-/**
- * igb_clean_all_tx_rings - Free Tx Buffers for all queues
- * @adapter: board private structure
- **/
-static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		igb_clean_tx_ring(adapter->tx_ring[i]);
-}
-
-/**
- * igb_free_rx_resources - Free Rx Resources
- * @rx_ring: ring to clean the resources from
- *
- * Free all receive software resources
- **/
-void igb_free_rx_resources(struct igb_ring *rx_ring)
-{
-	igb_clean_rx_ring(rx_ring);
-
-	vfree(rx_ring->rx_buffer_info);
-	rx_ring->rx_buffer_info = NULL;
-
-	/* if not set, then don't free */
-	if (!rx_ring->desc)
-		return;
-
-	dma_free_coherent(rx_ring->dev, rx_ring->size,
-			  rx_ring->desc, rx_ring->dma);
-
-	rx_ring->desc = NULL;
-}
-
-/**
- * igb_free_all_rx_resources - Free Rx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all receive software resources
- **/
-static void igb_free_all_rx_resources(struct igb_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		igb_free_rx_resources(adapter->rx_ring[i]);
-}
-
-/**
- * igb_clean_rx_ring - Free Rx Buffers per Queue
- * @rx_ring: ring to free buffers from
- **/
-void igb_clean_rx_ring(struct igb_ring *rx_ring)
-{
-	unsigned long size;
-	u16 i;
-
-	if (!rx_ring->rx_buffer_info)
-		return;
-
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	if (rx_ring->skb)
-		dev_kfree_skb(rx_ring->skb);
-	rx_ring->skb = NULL;
-
-#endif
-	/* Free all the Rx ring sk_buffs */
-	for (i = 0; i < rx_ring->count; i++) {
-		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		if (buffer_info->dma) {
-			dma_unmap_single(rx_ring->dev,
-			                 buffer_info->dma,
-					 rx_ring->rx_buffer_len,
-					 DMA_FROM_DEVICE);
-			buffer_info->dma = 0;
-		}
-
-		if (buffer_info->skb) {
-			dev_kfree_skb(buffer_info->skb);
-			buffer_info->skb = NULL;
-		}
-#else
-		if (!buffer_info->page)
-			continue;
-
-		dma_unmap_page(rx_ring->dev,
-			       buffer_info->dma,
-			       PAGE_SIZE,
-			       DMA_FROM_DEVICE);
-		__free_page(buffer_info->page);
-
-		buffer_info->page = NULL;
-#endif
-	}
-
-	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
-	memset(rx_ring->rx_buffer_info, 0, size);
-
-	/* Zero out the descriptor ring */
-	memset(rx_ring->desc, 0, rx_ring->size);
-
-	rx_ring->next_to_alloc = 0;
-	rx_ring->next_to_clean = 0;
-	rx_ring->next_to_use = 0;
-}
-
-/**
- * igb_clean_all_rx_rings - Free Rx Buffers for all queues
- * @adapter: board private structure
- **/
-static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		igb_clean_rx_ring(adapter->rx_ring[i]);
-}
-
-/**
- * igb_set_mac - Change the Ethernet Address of the NIC
- * @netdev: network interface device structure
- * @p: pointer to an address structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int igb_set_mac(struct net_device *netdev, void *p)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	igb_del_mac_filter(adapter, hw->mac.addr,
-			   adapter->vfs_allocated_count);
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
-
-	/* set the correct pool for the new PF MAC address in entry 0 */
-	return igb_add_mac_filter(adapter, hw->mac.addr,
-	                   adapter->vfs_allocated_count);
-}
-
-/**
- * igb_write_mc_addr_list - write multicast addresses to MTA
- * @netdev: network interface device structure
- *
- * Writes multicast address list to the MTA hash table.
- * Returns: -ENOMEM on failure
- *                0 on no addresses written
- *                X on writing X addresses to MTA
- **/
-int igb_write_mc_addr_list(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-	struct netdev_hw_addr *ha;
-#else
-	struct dev_mc_list *ha;
-#endif
-	u8  *mta_list;
-	int i, count;
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	int vm;
-#endif
-	count = netdev_mc_count(netdev);
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	for (vm = 1; vm < adapter->vmdq_pools; vm++) {
-		if (!adapter->vmdq_netdev[vm])
-			break;
-		if (!netif_running(adapter->vmdq_netdev[vm]))
-			continue;
-		count += netdev_mc_count(adapter->vmdq_netdev[vm]);
-	}
-#endif
-
-	if (!count) {
-		e1000_update_mc_addr_list(hw, NULL, 0);
-		return 0;
-	}
-	mta_list = kzalloc(count * 6, GFP_ATOMIC);
-	if (!mta_list)
-		return -ENOMEM;
-
-	/* The shared function expects a packed array of only addresses. */
-	i = 0;
-	netdev_for_each_mc_addr(ha, netdev)
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
-#else
-		memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
-#endif
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	for (vm = 1; vm < adapter->vmdq_pools; vm++) {
-		if (!adapter->vmdq_netdev[vm])
-			break;
-		if (!netif_running(adapter->vmdq_netdev[vm]) ||
-		    !netdev_mc_count(adapter->vmdq_netdev[vm]))
-			continue;
-		netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-			memcpy(mta_list + (i++ * ETH_ALEN),
-			       ha->addr, ETH_ALEN);
-#else
-			memcpy(mta_list + (i++ * ETH_ALEN),
-			       ha->dmi_addr, ETH_ALEN);
-#endif
-	}
-#endif
-	e1000_update_mc_addr_list(hw, mta_list, i);
-	kfree(mta_list);
-
-	return count;
-}
-
-void igb_rar_set(struct igb_adapter *adapter, u32 index)
-{
-	u32 rar_low, rar_high;
-	struct e1000_hw *hw = &adapter->hw;
-	u8 *addr = adapter->mac_table[index].addr;
-	/* HW expects these in little endian so we reverse the byte order
-	 * from network order (big endian) to little endian
-	 */
-	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
-	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
-	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
-
-	/* Indicate to hardware the Address is Valid. */
-	if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
-		rar_high |= E1000_RAH_AV;
-
-	if (hw->mac.type == e1000_82575)
-		rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
-	else
-		rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
-
-	E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
-	E1000_WRITE_FLUSH(hw);
-	E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
-	E1000_WRITE_FLUSH(hw);
-}
-
-void igb_full_sync_mac_table(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-	for (i = 0; i < hw->mac.rar_entry_count; i++) {
-			igb_rar_set(adapter, i);
-	}
-}
-
-void igb_sync_mac_table(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-	for (i = 0; i < hw->mac.rar_entry_count; i++) {
-		if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
-			igb_rar_set(adapter, i);
-		adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
-	}
-}
-
-int igb_available_rars(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i, count = 0;
-
-	for (i = 0; i < hw->mac.rar_entry_count; i++) {
-		if (adapter->mac_table[i].state == 0)
-			count++;
-	}
-	return count;
-}
-
-#ifdef HAVE_SET_RX_MODE
-/**
- * igb_write_uc_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- *                0 on no addresses written
- *                X on writing X addresses to the RAR table
- **/
-static int igb_write_uc_addr_list(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	unsigned int vfn = adapter->vfs_allocated_count;
-	int count = 0;
-
-	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev_uc_count(netdev) > igb_available_rars(adapter))
-		return -ENOMEM;
-	if (!netdev_uc_empty(netdev)) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-		struct netdev_hw_addr *ha;
-#else
-		struct dev_mc_list *ha;
-#endif
-		netdev_for_each_uc_addr(ha, netdev) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-			igb_del_mac_filter(adapter, ha->addr, vfn);
-			igb_add_mac_filter(adapter, ha->addr, vfn);
-#else
-			igb_del_mac_filter(adapter, ha->da_addr, vfn);
-			igb_add_mac_filter(adapter, ha->da_addr, vfn);
-#endif
-			count++;
-		}
-	}
-	return count;
-}
-
-#endif /* HAVE_SET_RX_MODE */
-/**
- * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
- * @netdev: network interface device structure
- *
- * The set_rx_mode entry point is called whenever the unicast or multicast
- * address lists or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper unicast, multicast,
- * promiscuous mode, and all-multi behavior.
- **/
-static void igb_set_rx_mode(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	unsigned int vfn = adapter->vfs_allocated_count;
-	u32 rctl, vmolr = 0;
-	int count;
-
-	/* Check for Promiscuous and All Multicast modes */
-	rctl = E1000_READ_REG(hw, E1000_RCTL);
-
-	/* clear the effected bits */
-	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
-
-	if (netdev->flags & IFF_PROMISC) {
-		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
-		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
-		/* retain VLAN HW filtering if in VT mode */
-		if (adapter->vfs_allocated_count || adapter->vmdq_pools)
-			rctl |= E1000_RCTL_VFE;
-	} else {
-		if (netdev->flags & IFF_ALLMULTI) {
-			rctl |= E1000_RCTL_MPE;
-			vmolr |= E1000_VMOLR_MPME;
-		} else {
-			/*
-			 * Write addresses to the MTA, if the attempt fails
-			 * then we should just turn on promiscuous mode so
-			 * that we can at least receive multicast traffic
-			 */
-			count = igb_write_mc_addr_list(netdev);
-			if (count < 0) {
-				rctl |= E1000_RCTL_MPE;
-				vmolr |= E1000_VMOLR_MPME;
-			} else if (count) {
-				vmolr |= E1000_VMOLR_ROMPE;
-			}
-		}
-#ifdef HAVE_SET_RX_MODE
-		/*
-		 * Write addresses to available RAR registers, if there is not
-		 * sufficient space to store all the addresses then enable
-		 * unicast promiscuous mode
-		 */
-		count = igb_write_uc_addr_list(netdev);
-		if (count < 0) {
-			rctl |= E1000_RCTL_UPE;
-			vmolr |= E1000_VMOLR_ROPE;
-		}
-#endif /* HAVE_SET_RX_MODE */
-		rctl |= E1000_RCTL_VFE;
-	}
-	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-
-	/*
-	 * In order to support SR-IOV and eventually VMDq it is necessary to set
-	 * the VMOLR to enable the appropriate modes.  Without this workaround
-	 * we will have issues with VLAN tag stripping not being done for frames
-	 * that are only arriving because we are the default pool
-	 */
-	if (hw->mac.type < e1000_82576)
-		return;
-
-	vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
-	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
-	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
-	igb_restore_vf_multicasts(adapter);
-}
-
-static void igb_check_wvbr(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 wvbr = 0;
-
-	switch (hw->mac.type) {
-	case e1000_82576:
-	case e1000_i350:
-		if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
-			return;
-		break;
-	default:
-		break;
-	}
-
-	adapter->wvbr |= wvbr;
-}
-
-#define IGB_STAGGERED_QUEUE_OFFSET 8
-
-static void igb_spoof_check(struct igb_adapter *adapter)
-{
-	int j;
-
-	if (!adapter->wvbr)
-		return;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-		for (j = 0; j < adapter->vfs_allocated_count; j++) {
-			if (adapter->wvbr & (1 << j) ||
-			    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
-				DPRINTK(DRV, WARNING,
-					"Spoof event(s) detected on VF %d\n", j);
-				adapter->wvbr &=
-					~((1 << j) |
-					  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
-			}
-		}
-		break;
-	case e1000_i350:
-		for (j = 0; j < adapter->vfs_allocated_count; j++) {
-			if (adapter->wvbr & (1 << j)) {
-				DPRINTK(DRV, WARNING,
-					"Spoof event(s) detected on VF %d\n", j);
-				adapter->wvbr &= ~(1 << j);
-			}
-		}
-		break;
-	default:
-		break;
-	}
-}
-
-/* Need to wait a few seconds after link up to get diagnostic information from
- * the phy */
-#ifdef HAVE_TIMER_SETUP
-static void igb_update_phy_info(struct timer_list *t)
-{
-	struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
-#else
-static void igb_update_phy_info(unsigned long data)
-{
-	struct igb_adapter *adapter = (struct igb_adapter *) data;
-#endif
-	e1000_get_phy_info(&adapter->hw);
-}
-
-/**
- * igb_has_link - check shared code for link and determine up/down
- * @adapter: pointer to driver private info
- **/
-bool igb_has_link(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	bool link_active = FALSE;
-
-	/* get_link_status is set on LSC (link status) interrupt or
-	 * rx sequence error interrupt.  get_link_status will stay
-	 * false until the e1000_check_for_link establishes link
-	 * for copper adapters ONLY
-	 */
-	switch (hw->phy.media_type) {
-	case e1000_media_type_copper:
-		if (!hw->mac.get_link_status)
-			return true;
-	case e1000_media_type_internal_serdes:
-		e1000_check_for_link(hw);
-		link_active = !hw->mac.get_link_status;
-		break;
-	case e1000_media_type_unknown:
-	default:
-		break;
-	}
-
-	if (((hw->mac.type == e1000_i210) ||
-	     (hw->mac.type == e1000_i211)) &&
-	     (hw->phy.id == I210_I_PHY_ID)) {
-		if (!netif_carrier_ok(adapter->netdev)) {
-			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
-		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
-			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
-			adapter->link_check_timeout = jiffies;
-		}
-	}
-
-	return link_active;
-}
-
-/**
- * igb_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-#ifdef HAVE_TIMER_SETUP
-static void igb_watchdog(struct timer_list *t)
-{
-	struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-#else
-static void igb_watchdog(unsigned long data)
-{
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-#endif
-	/* Do the rest outside of interrupt context */
-	schedule_work(&adapter->watchdog_task);
-}
-
-static void igb_watchdog_task(struct work_struct *work)
-{
-	struct igb_adapter *adapter = container_of(work,
-	                                           struct igb_adapter,
-                                                   watchdog_task);
-	struct e1000_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	u32 link;
-	int i;
-	u32 thstat, ctrl_ext;
-	u32 connsw;
-
-	link = igb_has_link(adapter);
-	/* Force link down if we have fiber to swap to */
-	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
-		if (hw->phy.media_type == e1000_media_type_copper) {
-			connsw = E1000_READ_REG(hw, E1000_CONNSW);
-			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
-				link = 0;
-		}
-	}
-
-	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
-		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
-			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
-		else
-			link = FALSE;
-	}
-
-	if (link) {
-		/* Perform a reset if the media type changed. */
-		if (hw->dev_spec._82575.media_changed) {
-			hw->dev_spec._82575.media_changed = false;
-			adapter->flags |= IGB_FLAG_MEDIA_RESET;
-			igb_reset(adapter);
-		}
-
-		/* Cancel scheduled suspend requests. */
-		pm_runtime_resume(netdev->dev.parent);
-
-		if (!netif_carrier_ok(netdev)) {
-			u32 ctrl;
-			e1000_get_speed_and_duplex(hw,
-			                           &adapter->link_speed,
-			                           &adapter->link_duplex);
-
-			ctrl = E1000_READ_REG(hw, E1000_CTRL);
-			/* Links status message must follow this format */
-			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
-				 "Flow Control: %s\n",
-			       netdev->name,
-			       adapter->link_speed,
-			       adapter->link_duplex == FULL_DUPLEX ?
-				 "Full Duplex" : "Half Duplex",
-			       ((ctrl & E1000_CTRL_TFCE) &&
-			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
-			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
-			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
-			/* adjust timeout factor according to speed/duplex */
-			adapter->tx_timeout_factor = 1;
-			switch (adapter->link_speed) {
-			case SPEED_10:
-				adapter->tx_timeout_factor = 14;
-				break;
-			case SPEED_100:
-				/* maybe add some timeout factor ? */
-				break;
-			default:
-				break;
-			}
-
-			netif_carrier_on(netdev);
-			netif_tx_wake_all_queues(netdev);
-
-			igb_ping_all_vfs(adapter);
-#ifdef IFLA_VF_MAX
-			igb_check_vf_rate_limit(adapter);
-#endif /* IFLA_VF_MAX */
-
-			/* link state has changed, schedule phy info update */
-			if (!test_bit(__IGB_DOWN, &adapter->state))
-				mod_timer(&adapter->phy_info_timer,
-					  round_jiffies(jiffies + 2 * HZ));
-		}
-	} else {
-		if (netif_carrier_ok(netdev)) {
-			adapter->link_speed = 0;
-			adapter->link_duplex = 0;
-			/* check for thermal sensor event on i350 */
-			if (hw->mac.type == e1000_i350) {
-				thstat = E1000_READ_REG(hw, E1000_THSTAT);
-				ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
-				if ((hw->phy.media_type ==
-					e1000_media_type_copper) &&
-					!(ctrl_ext &
-					E1000_CTRL_EXT_LINK_MODE_SGMII)) {
-					if (thstat & E1000_THSTAT_PWR_DOWN) {
-						printk(KERN_ERR "igb: %s The "
-						"network adapter was stopped "
-						"because it overheated.\n",
-						netdev->name);
-					}
-					if (thstat & E1000_THSTAT_LINK_THROTTLE) {
-						printk(KERN_INFO
-							"igb: %s The network "
-							"adapter supported "
-							"link speed "
-							"was downshifted "
-							"because it "
-							"overheated.\n",
-							netdev->name);
-					}
-				}
-			}
-
-			/* Links status message must follow this format */
-			printk(KERN_INFO "igb: %s NIC Link is Down\n",
-			       netdev->name);
-			netif_carrier_off(netdev);
-			netif_tx_stop_all_queues(netdev);
-
-			igb_ping_all_vfs(adapter);
-
-			/* link state has changed, schedule phy info update */
-			if (!test_bit(__IGB_DOWN, &adapter->state))
-				mod_timer(&adapter->phy_info_timer,
-					  round_jiffies(jiffies + 2 * HZ));
-			/* link is down, time to check for alternate media */
-			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
-				igb_check_swap_media(adapter);
-				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
-					schedule_work(&adapter->reset_task);
-					/* return immediately */
-					return;
-				}
-			}
-			pm_schedule_suspend(netdev->dev.parent,
-					    MSEC_PER_SEC * 5);
-
-		/* also check for alternate media here */
-		} else if (!netif_carrier_ok(netdev) &&
-			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
-			hw->mac.ops.power_up_serdes(hw);
-			igb_check_swap_media(adapter);
-			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
-				schedule_work(&adapter->reset_task);
-				/* return immediately */
-				return;
-			}
-		}
-	}
-
-	igb_update_stats(adapter);
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		struct igb_ring *tx_ring = adapter->tx_ring[i];
-		if (!netif_carrier_ok(netdev)) {
-			/* We've lost link, so the controller stops DMA,
-			 * but we've got queued Tx work that's never going
-			 * to get done, so reset controller to flush Tx.
-			 * (Do the reset outside of interrupt context). */
-			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
-				adapter->tx_timeout_count++;
-				schedule_work(&adapter->reset_task);
-				/* return immediately since reset is imminent */
-				return;
-			}
-		}
-
-		/* Force detection of hung controller every watchdog period */
-		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
-	}
-
-	/* Cause software interrupt to ensure rx ring is cleaned */
-	if (adapter->msix_entries) {
-		u32 eics = 0;
-		for (i = 0; i < adapter->num_q_vectors; i++)
-			eics |= adapter->q_vector[i]->eims_value;
-		E1000_WRITE_REG(hw, E1000_EICS, eics);
-	} else {
-		E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
-	}
-
-	igb_spoof_check(adapter);
-
-	/* Reset the timer */
-	if (!test_bit(__IGB_DOWN, &adapter->state)) {
-		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
-			mod_timer(&adapter->watchdog_timer,
-				  round_jiffies(jiffies +  HZ));
-		else
-			mod_timer(&adapter->watchdog_timer,
-				  round_jiffies(jiffies + 2 * HZ));
-	}
-}
-
-static void igb_dma_err_task(struct work_struct *work)
-{
-	struct igb_adapter *adapter = container_of(work,
-	                                           struct igb_adapter,
-                                                   dma_err_task);
-	int vf;
-	struct e1000_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	u32 hgptc;
-	u32 ciaa, ciad;
-
-	hgptc = E1000_READ_REG(hw, E1000_HGPTC);
-	if (hgptc) /* If incrementing then no need for the check below */
-		goto dma_timer_reset;
-	/*
-	 * Check to see if a bad DMA write target from an errant or
-	 * malicious VF has caused a PCIe error.  If so then we can
-	 * issue a VFLR to the offending VF(s) and then resume without
-	 * requesting a full slot reset.
-	 */
-
-	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
-		ciaa = (vf << 16) | 0x80000000;
-		/* 32 bit read so align, we really want status at offset 6 */
-		ciaa |= PCI_COMMAND;
-		E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
-		ciad = E1000_READ_REG(hw, E1000_CIAD);
-		ciaa &= 0x7FFFFFFF;
-		/* disable debug mode asap after reading data */
-		E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
-		/* Get the upper 16 bits which will be the PCI status reg */
-		ciad >>= 16;
-		if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
-			    PCI_STATUS_REC_TARGET_ABORT |
-			    PCI_STATUS_SIG_SYSTEM_ERROR)) {
-			netdev_err(netdev, "VF %d suffered error\n", vf);
-			/* Issue VFLR */
-			ciaa = (vf << 16) | 0x80000000;
-			ciaa |= 0xA8;
-			E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
-			ciad = 0x00008000;  /* VFLR */
-			E1000_WRITE_REG(hw, E1000_CIAD, ciad);
-			ciaa &= 0x7FFFFFFF;
-			E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
-		}
-	}
-dma_timer_reset:
-	/* Reset the timer */
-	if (!test_bit(__IGB_DOWN, &adapter->state))
-		mod_timer(&adapter->dma_err_timer,
-			  round_jiffies(jiffies + HZ / 10));
-}
-
-/**
- * igb_dma_err_timer - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-#ifdef HAVE_TIMER_SETUP
-static void igb_dma_err_timer(struct timer_list *t)
-{
-	struct igb_adapter *adapter = from_timer(adapter, t, dma_err_timer);
-#else
-static void igb_dma_err_timer(unsigned long data)
-{
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-#endif
-	/* Do the rest outside of interrupt context */
-	schedule_work(&adapter->dma_err_task);
-}
-
-enum latency_range {
-	lowest_latency = 0,
-	low_latency = 1,
-	bulk_latency = 2,
-	latency_invalid = 255
-};
-
-/**
- * igb_update_ring_itr - update the dynamic ITR value based on packet size
- *
- *      Stores a new ITR value based on strictly on packet size.  This
- *      algorithm is less sophisticated than that used in igb_update_itr,
- *      due to the difficulty of synchronizing statistics across multiple
- *      receive rings.  The divisors and thresholds used by this function
- *      were determined based on theoretical maximum wire speed and testing
- *      data, in order to minimize response time while increasing bulk
- *      throughput.
- *      This functionality is controlled by the InterruptThrottleRate module
- *      parameter (see igb_param.c)
- *      NOTE:  This function is called only when operating in a multiqueue
- *             receive environment.
- * @q_vector: pointer to q_vector
- **/
-static void igb_update_ring_itr(struct igb_q_vector *q_vector)
-{
-	int new_val = q_vector->itr_val;
-	int avg_wire_size = 0;
-	struct igb_adapter *adapter = q_vector->adapter;
-	unsigned int packets;
-
-	/* For non-gigabit speeds, just fix the interrupt rate at 4000
-	 * ints/sec - ITR timer value of 120 ticks.
-	 */
-	switch (adapter->link_speed) {
-	case SPEED_10:
-	case SPEED_100:
-		new_val = IGB_4K_ITR;
-		goto set_itr_val;
-	default:
-		break;
-	}
-
-	packets = q_vector->rx.total_packets;
-	if (packets)
-		avg_wire_size = q_vector->rx.total_bytes / packets;
-
-	packets = q_vector->tx.total_packets;
-	if (packets)
-		avg_wire_size = max_t(u32, avg_wire_size,
-		                      q_vector->tx.total_bytes / packets);
-
-	/* if avg_wire_size isn't set no work was done */
-	if (!avg_wire_size)
-		goto clear_counts;
-
-	/* Add 24 bytes to size to account for CRC, preamble, and gap */
-	avg_wire_size += 24;
-
-	/* Don't starve jumbo frames */
-	avg_wire_size = min(avg_wire_size, 3000);
-
-	/* Give a little boost to mid-size frames */
-	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
-		new_val = avg_wire_size / 3;
-	else
-		new_val = avg_wire_size / 2;
-
-	/* conservative mode (itr 3) eliminates the lowest_latency setting */
-	if (new_val < IGB_20K_ITR &&
-	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
-	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
-		new_val = IGB_20K_ITR;
-
-set_itr_val:
-	if (new_val != q_vector->itr_val) {
-		q_vector->itr_val = new_val;
-		q_vector->set_itr = 1;
-	}
-clear_counts:
-	q_vector->rx.total_bytes = 0;
-	q_vector->rx.total_packets = 0;
-	q_vector->tx.total_bytes = 0;
-	q_vector->tx.total_packets = 0;
-}
-
-/**
- * igb_update_itr - update the dynamic ITR value based on statistics
- *      Stores a new ITR value based on packets and byte
- *      counts during the last interrupt.  The advantage of per interrupt
- *      computation is faster updates and more accurate ITR for the current
- *      traffic pattern.  Constants in this function were computed
- *      based on theoretical maximum wire speed and thresholds were set based
- *      on testing data as well as attempting to minimize response time
- *      while increasing bulk throughput.
- *      this functionality is controlled by the InterruptThrottleRate module
- *      parameter (see igb_param.c)
- *      NOTE:  These calculations are only valid when operating in a single-
- *             queue environment.
- * @q_vector: pointer to q_vector
- * @ring_container: ring info to update the itr for
- **/
-static void igb_update_itr(struct igb_q_vector *q_vector,
-			   struct igb_ring_container *ring_container)
-{
-	unsigned int packets = ring_container->total_packets;
-	unsigned int bytes = ring_container->total_bytes;
-	u8 itrval = ring_container->itr;
-
-	/* no packets, exit with status unchanged */
-	if (packets == 0)
-		return;
-
-	switch (itrval) {
-	case lowest_latency:
-		/* handle TSO and jumbo frames */
-		if (bytes/packets > 8000)
-			itrval = bulk_latency;
-		else if ((packets < 5) && (bytes > 512))
-			itrval = low_latency;
-		break;
-	case low_latency:  /* 50 usec aka 20000 ints/s */
-		if (bytes > 10000) {
-			/* this if handles the TSO accounting */
-			if (bytes/packets > 8000) {
-				itrval = bulk_latency;
-			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
-				itrval = bulk_latency;
-			} else if (packets > 35) {
-				itrval = lowest_latency;
-			}
-		} else if (bytes/packets > 2000) {
-			itrval = bulk_latency;
-		} else if (packets <= 2 && bytes < 512) {
-			itrval = lowest_latency;
-		}
-		break;
-	case bulk_latency: /* 250 usec aka 4000 ints/s */
-		if (bytes > 25000) {
-			if (packets > 35)
-				itrval = low_latency;
-		} else if (bytes < 1500) {
-			itrval = low_latency;
-		}
-		break;
-	}
-
-	/* clear work counters since we have the values we need */
-	ring_container->total_bytes = 0;
-	ring_container->total_packets = 0;
-
-	/* write updated itr to ring container */
-	ring_container->itr = itrval;
-}
-
-static void igb_set_itr(struct igb_q_vector *q_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	u32 new_itr = q_vector->itr_val;
-	u8 current_itr = 0;
-
-	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
-	switch (adapter->link_speed) {
-	case SPEED_10:
-	case SPEED_100:
-		current_itr = 0;
-		new_itr = IGB_4K_ITR;
-		goto set_itr_now;
-	default:
-		break;
-	}
-
-	igb_update_itr(q_vector, &q_vector->tx);
-	igb_update_itr(q_vector, &q_vector->rx);
-
-	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
-
-	/* conservative mode (itr 3) eliminates the lowest_latency setting */
-	if (current_itr == lowest_latency &&
-	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
-	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
-		current_itr = low_latency;
-
-	switch (current_itr) {
-	/* counts and packets in update_itr are dependent on these numbers */
-	case lowest_latency:
-		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
-		break;
-	case low_latency:
-		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
-		break;
-	case bulk_latency:
-		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
-		break;
-	default:
-		break;
-	}
-
-set_itr_now:
-	if (new_itr != q_vector->itr_val) {
-		/* this attempts to bias the interrupt rate towards Bulk
-		 * by adding intermediate steps when interrupt rate is
-		 * increasing */
-		new_itr = new_itr > q_vector->itr_val ?
-		             max((new_itr * q_vector->itr_val) /
-		                 (new_itr + (q_vector->itr_val >> 2)),
-				 new_itr) :
-			     new_itr;
-		/* Don't write the value here; it resets the adapter's
-		 * internal timer, and causes us to delay far longer than
-		 * we should between interrupts.  Instead, we write the ITR
-		 * value at the beginning of the next interrupt so the timing
-		 * ends up being correct.
-		 */
-		q_vector->itr_val = new_itr;
-		q_vector->set_itr = 1;
-	}
-}
-
-void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
-		     u32 type_tucmd, u32 mss_l4len_idx)
-{
-	struct e1000_adv_tx_context_desc *context_desc;
-	u16 i = tx_ring->next_to_use;
-
-	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
-
-	i++;
-	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
-
-	/* set bits to identify this as an advanced context descriptor */
-	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
-
-	/* For 82575, context index must be unique per ring. */
-	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
-		mss_l4len_idx |= tx_ring->reg_idx << 4;
-
-	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
-	context_desc->seqnum_seed	= 0;
-	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
-	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
-}
-
-static int igb_tso(struct igb_ring *tx_ring,
-		   struct igb_tx_buffer *first,
-		   u8 *hdr_len)
-{
-#ifdef NETIF_F_TSO
-	struct sk_buff *skb = first->skb;
-	u32 vlan_macip_lens, type_tucmd;
-	u32 mss_l4len_idx, l4len;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return 0;
-
-	if (!skb_is_gso(skb))
-#endif /* NETIF_F_TSO */
-		return 0;
-#ifdef NETIF_F_TSO
-
-	if (skb_header_cloned(skb)) {
-		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-		if (err)
-			return err;
-	}
-
-	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
-	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
-
-	if (first->protocol == __constant_htons(ETH_P_IP)) {
-		struct iphdr *iph = ip_hdr(skb);
-		iph->tot_len = 0;
-		iph->check = 0;
-		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
-							 iph->daddr, 0,
-							 IPPROTO_TCP,
-							 0);
-		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
-		first->tx_flags |= IGB_TX_FLAGS_TSO |
-				   IGB_TX_FLAGS_CSUM |
-				   IGB_TX_FLAGS_IPV4;
-#ifdef NETIF_F_TSO6
-	} else if (skb_is_gso_v6(skb)) {
-		ipv6_hdr(skb)->payload_len = 0;
-		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-						       &ipv6_hdr(skb)->daddr,
-						       0, IPPROTO_TCP, 0);
-		first->tx_flags |= IGB_TX_FLAGS_TSO |
-				   IGB_TX_FLAGS_CSUM;
-#endif
-	}
-
-	/* compute header lengths */
-	l4len = tcp_hdrlen(skb);
-	*hdr_len = skb_transport_offset(skb) + l4len;
-
-	/* update gso size and bytecount with header size */
-	first->gso_segs = skb_shinfo(skb)->gso_segs;
-	first->bytecount += (first->gso_segs - 1) * *hdr_len;
-
-	/* MSS L4LEN IDX */
-	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
-	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
-
-	/* VLAN MACLEN IPLEN */
-	vlan_macip_lens = skb_network_header_len(skb);
-	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
-	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
-
-	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
-
-	return 1;
-#endif  /* NETIF_F_TSO */
-}
-
-static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
-{
-	struct sk_buff *skb = first->skb;
-	u32 vlan_macip_lens = 0;
-	u32 mss_l4len_idx = 0;
-	u32 type_tucmd = 0;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
-			return;
-	} else {
-		u8 nexthdr = 0;
-		switch (first->protocol) {
-		case __constant_htons(ETH_P_IP):
-			vlan_macip_lens |= skb_network_header_len(skb);
-			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
-			nexthdr = ip_hdr(skb)->protocol;
-			break;
-#ifdef NETIF_F_IPV6_CSUM
-		case __constant_htons(ETH_P_IPV6):
-			vlan_macip_lens |= skb_network_header_len(skb);
-			nexthdr = ipv6_hdr(skb)->nexthdr;
-			break;
-#endif
-		default:
-			if (unlikely(net_ratelimit())) {
-				dev_warn(tx_ring->dev,
-				 "partial checksum but proto=%x!\n",
-				 first->protocol);
-			}
-			break;
-		}
-
-		switch (nexthdr) {
-		case IPPROTO_TCP:
-			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
-			mss_l4len_idx = tcp_hdrlen(skb) <<
-					E1000_ADVTXD_L4LEN_SHIFT;
-			break;
-#ifdef HAVE_SCTP
-		case IPPROTO_SCTP:
-			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
-			mss_l4len_idx = sizeof(struct sctphdr) <<
-					E1000_ADVTXD_L4LEN_SHIFT;
-			break;
-#endif
-		case IPPROTO_UDP:
-			mss_l4len_idx = sizeof(struct udphdr) <<
-					E1000_ADVTXD_L4LEN_SHIFT;
-			break;
-		default:
-			if (unlikely(net_ratelimit())) {
-				dev_warn(tx_ring->dev,
-				 "partial checksum but l4 proto=%x!\n",
-				 nexthdr);
-			}
-			break;
-		}
-
-		/* update TX checksum flag */
-		first->tx_flags |= IGB_TX_FLAGS_CSUM;
-	}
-
-	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
-	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
-
-	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
-}
-
-#define IGB_SET_FLAG(_input, _flag, _result) \
-	((_flag <= _result) ? \
-	 ((u32)(_input & _flag) * (_result / _flag)) : \
-	 ((u32)(_input & _flag) / (_flag / _result)))
-
-static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
-{
-	/* set type for advanced descriptor with frame checksum insertion */
-	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
-		       E1000_ADVTXD_DCMD_DEXT |
-		       E1000_ADVTXD_DCMD_IFCS;
-
-	/* set HW vlan bit if vlan is present */
-	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
-				 (E1000_ADVTXD_DCMD_VLE));
-
-	/* set segmentation bits for TSO */
-	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
-				 (E1000_ADVTXD_DCMD_TSE));
-
-	/* set timestamp bit if present */
-	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
-				 (E1000_ADVTXD_MAC_TSTAMP));
-
-	return cmd_type;
-}
-
-static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
-				 union e1000_adv_tx_desc *tx_desc,
-				 u32 tx_flags, unsigned int paylen)
-{
-	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
-
-	/* 82575 requires a unique index per ring */
-	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
-		olinfo_status |= tx_ring->reg_idx << 4;
-
-	/* insert L4 checksum */
-	olinfo_status |= IGB_SET_FLAG(tx_flags,
-				      IGB_TX_FLAGS_CSUM,
-				      (E1000_TXD_POPTS_TXSM << 8));
-
-	/* insert IPv4 checksum */
-	olinfo_status |= IGB_SET_FLAG(tx_flags,
-				      IGB_TX_FLAGS_IPV4,
-				      (E1000_TXD_POPTS_IXSM << 8));
-
-	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
-}
-
-static void igb_tx_map(struct igb_ring *tx_ring,
-		       struct igb_tx_buffer *first,
-		       const u8 hdr_len)
-{
-	struct sk_buff *skb = first->skb;
-	struct igb_tx_buffer *tx_buffer;
-	union e1000_adv_tx_desc *tx_desc;
-	struct skb_frag_struct *frag;
-	dma_addr_t dma;
-	unsigned int data_len, size;
-	u32 tx_flags = first->tx_flags;
-	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
-	u16 i = tx_ring->next_to_use;
-
-	tx_desc = IGB_TX_DESC(tx_ring, i);
-
-	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
-
-	size = skb_headlen(skb);
-	data_len = skb->data_len;
-
-	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
-
-	tx_buffer = first;
-
-	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
-		if (dma_mapping_error(tx_ring->dev, dma))
-			goto dma_error;
-
-		/* record length, and DMA address */
-		dma_unmap_len_set(tx_buffer, len, size);
-		dma_unmap_addr_set(tx_buffer, dma, dma);
-
-		tx_desc->read.buffer_addr = cpu_to_le64(dma);
-
-		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
-			tx_desc->read.cmd_type_len =
-				cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
-
-			i++;
-			tx_desc++;
-			if (i == tx_ring->count) {
-				tx_desc = IGB_TX_DESC(tx_ring, 0);
-				i = 0;
-			}
-			tx_desc->read.olinfo_status = 0;
-
-			dma += IGB_MAX_DATA_PER_TXD;
-			size -= IGB_MAX_DATA_PER_TXD;
-
-			tx_desc->read.buffer_addr = cpu_to_le64(dma);
-		}
-
-		if (likely(!data_len))
-			break;
-
-		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
-
-		i++;
-		tx_desc++;
-		if (i == tx_ring->count) {
-			tx_desc = IGB_TX_DESC(tx_ring, 0);
-			i = 0;
-		}
-		tx_desc->read.olinfo_status = 0;
-
-		size = skb_frag_size(frag);
-		data_len -= size;
-
-		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
-				       size, DMA_TO_DEVICE);
-
-		tx_buffer = &tx_ring->tx_buffer_info[i];
-	}
-
-	/* write last descriptor with RS and EOP bits */
-	cmd_type |= size | IGB_TXD_DCMD;
-	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
-
-	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
-	/* set the timestamp */
-	first->time_stamp = jiffies;
-
-	/*
-	 * Force memory writes to complete before letting h/w know there
-	 * are new descriptors to fetch.  (Only applicable for weak-ordered
-	 * memory model archs, such as IA-64).
-	 *
-	 * We also need this memory barrier to make certain all of the
-	 * status bits have been updated before next_to_watch is written.
-	 */
-	wmb();
-
-	/* set next_to_watch value indicating a packet is present */
-	first->next_to_watch = tx_desc;
-
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	tx_ring->next_to_use = i;
-
-	writel(i, tx_ring->tail);
-
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it syncronizes IO on IA64/Altix systems */
-	mmiowb();
-
-	return;
-
-dma_error:
-	dev_err(tx_ring->dev, "TX DMA map failed\n");
-
-	/* clear dma mappings for failed tx_buffer_info map */
-	for (;;) {
-		tx_buffer = &tx_ring->tx_buffer_info[i];
-		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
-		if (tx_buffer == first)
-			break;
-		if (i == 0)
-			i = tx_ring->count;
-		i--;
-	}
-
-	tx_ring->next_to_use = i;
-}
-
-static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
-{
-	struct net_device *netdev = netdev_ring(tx_ring);
-
-	if (netif_is_multiqueue(netdev))
-		netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
-	else
-		netif_stop_queue(netdev);
-
-	/* Herbert's original patch had:
-	 *  smp_mb__after_netif_stop_queue();
-	 * but since that doesn't exist yet, just open code it. */
-	smp_mb();
-
-	/* We need to check again in a case another CPU has just
-	 * made room available. */
-	if (igb_desc_unused(tx_ring) < size)
-		return -EBUSY;
-
-	/* A reprieve! */
-	if (netif_is_multiqueue(netdev))
-		netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
-	else
-		netif_wake_queue(netdev);
-
-	tx_ring->tx_stats.restart_queue++;
-
-	return 0;
-}
-
-static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
-{
-	if (igb_desc_unused(tx_ring) >= size)
-		return 0;
-	return __igb_maybe_stop_tx(tx_ring, size);
-}
-
-netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
-				struct igb_ring *tx_ring)
-{
-	struct igb_tx_buffer *first;
-	int tso;
-	u32 tx_flags = 0;
-#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
-	unsigned short f;
-#endif
-	u16 count = TXD_USE_COUNT(skb_headlen(skb));
-	__be16 protocol = vlan_get_protocol(skb);
-	u8 hdr_len = 0;
-
-	/*
-	 * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
-	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
-	 *       + 2 desc gap to keep tail from touching head,
-	 *       + 1 desc for context descriptor,
-	 * otherwise try next time
-	 */
-#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
-	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
-		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
-#else
-	count += skb_shinfo(skb)->nr_frags;
-#endif
-	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
-		/* this is a hard error */
-		return NETDEV_TX_BUSY;
-	}
-
-	/* record the location of the first descriptor for this packet */
-	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
-	first->skb = skb;
-	first->bytecount = skb->len;
-	first->gso_segs = 1;
-
-	skb_tx_timestamp(skb);
-
-#ifdef HAVE_PTP_1588_CLOCK
-	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
-		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
-		if (!adapter->ptp_tx_skb) {
-			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-			tx_flags |= IGB_TX_FLAGS_TSTAMP;
-
-			adapter->ptp_tx_skb = skb_get(skb);
-			adapter->ptp_tx_start = jiffies;
-			if (adapter->hw.mac.type == e1000_82576)
-				schedule_work(&adapter->ptp_tx_work);
-		}
-	}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= IGB_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
-	}
-
-	/* record initial flags and protocol */
-	first->tx_flags = tx_flags;
-	first->protocol = protocol;
-
-	tso = igb_tso(tx_ring, first, &hdr_len);
-	if (tso < 0)
-		goto out_drop;
-	else if (!tso)
-		igb_tx_csum(tx_ring, first);
-
-	igb_tx_map(tx_ring, first, hdr_len);
-
-#ifndef HAVE_TRANS_START_IN_QUEUE
-	netdev_ring(tx_ring)->trans_start = jiffies;
-
-#endif
-	/* Make sure there is space in the ring for the next send. */
-	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
-
-	return NETDEV_TX_OK;
-
-out_drop:
-	igb_unmap_and_free_tx_resource(tx_ring, first);
-
-	return NETDEV_TX_OK;
-}
-
-#ifdef HAVE_TX_MQ
-static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
-                                                    struct sk_buff *skb)
-{
-	unsigned int r_idx = skb->queue_mapping;
-
-	if (r_idx >= adapter->num_tx_queues)
-		r_idx = r_idx % adapter->num_tx_queues;
-
-	return adapter->tx_ring[r_idx];
-}
-#else
-#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
-#endif
-
-static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
-                                  struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (test_bit(__IGB_DOWN, &adapter->state)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	if (skb->len <= 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	/*
-	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
-	 * in order to meet this minimum size requirement.
-	 */
-	if (skb->len < 17) {
-		if (skb_padto(skb, 17))
-			return NETDEV_TX_OK;
-		skb->len = 17;
-	}
-
-	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
-}
-
-/**
- * igb_tx_timeout - Respond to a Tx Hang
- * @netdev: network interface device structure
- **/
-static void igb_tx_timeout(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-
-	/* Do the reset outside of interrupt context */
-	adapter->tx_timeout_count++;
-
-	if (hw->mac.type >= e1000_82580)
-		hw->dev_spec._82575.global_device_reset = true;
-
-	schedule_work(&adapter->reset_task);
-	E1000_WRITE_REG(hw, E1000_EICS,
-			(adapter->eims_enable_mask & ~adapter->eims_other));
-}
-
-static void igb_reset_task(struct work_struct *work)
-{
-	struct igb_adapter *adapter;
-	adapter = container_of(work, struct igb_adapter, reset_task);
-
-	igb_reinit_locked(adapter);
-}
-
-/**
- * igb_get_stats - Get System Network Statistics
- * @netdev: network interface device structure
- *
- * Returns the address of the device statistics structure.
- * The statistics are updated here and also from the timer callback.
- **/
-static struct net_device_stats *igb_get_stats(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (!test_bit(__IGB_RESETTING, &adapter->state))
-		igb_update_stats(adapter);
-
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	/* only return the current stats */
-	return &netdev->stats;
-#else
-	/* only return the current stats */
-	return &adapter->net_stats;
-#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
-}
-
-/**
- * igb_change_mtu - Change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns 0 on success, negative on failure
- **/
-static int igb_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct pci_dev *pdev = adapter->pdev;
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-
-	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
-		dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
-		return -EINVAL;
-	}
-
-#define MAX_STD_JUMBO_FRAME_SIZE 9238
-	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
-		dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
-		return -EINVAL;
-	}
-
-	/* adjust max frame to be at least the size of a standard frame */
-	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
-		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
-
-	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-
-	/* igb_down has a dependency on max_frame_size */
-	adapter->max_frame_size = max_frame;
-
-	if (netif_running(netdev))
-		igb_down(adapter);
-
-	dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
-	        netdev->mtu, new_mtu);
-	netdev->mtu = new_mtu;
-	hw->dev_spec._82575.mtu = new_mtu;
-
-	if (netif_running(netdev))
-		igb_up(adapter);
-	else
-		igb_reset(adapter);
-
-	clear_bit(__IGB_RESETTING, &adapter->state);
-
-	return 0;
-}
-
-/**
- * igb_update_stats - Update the board statistics counters
- * @adapter: board private structure
- **/
-
-void igb_update_stats(struct igb_adapter *adapter)
-{
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats *net_stats = &adapter->netdev->stats;
-#else
-	struct net_device_stats *net_stats = &adapter->net_stats;
-#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
-	struct e1000_hw *hw = &adapter->hw;
-#ifdef HAVE_PCI_ERS
-	struct pci_dev *pdev = adapter->pdev;
-#endif
-	u32 reg, mpc;
-	u16 phy_tmp;
-	int i;
-	u64 bytes, packets;
-#ifndef IGB_NO_LRO
-	u32 flushed = 0, coal = 0;
-	struct igb_q_vector *q_vector;
-#endif
-
-#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
-
-	/*
-	 * Prevent stats update while adapter is being reset, or if the pci
-	 * connection is down.
-	 */
-	if (adapter->link_speed == 0)
-		return;
-#ifdef HAVE_PCI_ERS
-	if (pci_channel_offline(pdev))
-		return;
-
-#endif
-#ifndef IGB_NO_LRO
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		q_vector = adapter->q_vector[i];
-		if (!q_vector)
-			continue;
-		flushed += q_vector->lrolist.stats.flushed;
-		coal += q_vector->lrolist.stats.coal;
-	}
-	adapter->lro_stats.flushed = flushed;
-	adapter->lro_stats.coal = coal;
-
-#endif
-	bytes = 0;
-	packets = 0;
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
-		struct igb_ring *ring = adapter->rx_ring[i];
-		ring->rx_stats.drops += rqdpc_tmp;
-		net_stats->rx_fifo_errors += rqdpc_tmp;
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-		if (!ring->vmdq_netdev) {
-			bytes += ring->rx_stats.bytes;
-			packets += ring->rx_stats.packets;
-		}
-#else
-		bytes += ring->rx_stats.bytes;
-		packets += ring->rx_stats.packets;
-#endif
-	}
-
-	net_stats->rx_bytes = bytes;
-	net_stats->rx_packets = packets;
-
-	bytes = 0;
-	packets = 0;
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		struct igb_ring *ring = adapter->tx_ring[i];
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-		if (!ring->vmdq_netdev) {
-			bytes += ring->tx_stats.bytes;
-			packets += ring->tx_stats.packets;
-		}
-#else
-		bytes += ring->tx_stats.bytes;
-		packets += ring->tx_stats.packets;
-#endif
-	}
-	net_stats->tx_bytes = bytes;
-	net_stats->tx_packets = packets;
-
-	/* read stats registers */
-	adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
-	adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
-	adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
-	E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
-	adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
-	adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
-	adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
-
-	adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
-	adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
-	adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
-	adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
-	adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
-	adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
-	adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
-	adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
-
-	mpc = E1000_READ_REG(hw, E1000_MPC);
-	adapter->stats.mpc += mpc;
-	net_stats->rx_fifo_errors += mpc;
-	adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
-	adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
-	adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
-	adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
-	adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
-	adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
-	adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
-	adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
-	adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
-	adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
-	adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
-	adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
-	adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
-	E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
-	adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
-	adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
-	adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
-	adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
-	adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
-	adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
-	adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
-
-	adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
-	adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
-	adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
-	adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
-	adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
-	adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
-
-	adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
-	adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
-
-	adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
-	adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
-
-	adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
-	/* read internal phy sepecific stats */
-	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
-	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
-		adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
-
-		/* this stat has invalid values on i210/i211 */
-		if ((hw->mac.type != e1000_i210) &&
-		    (hw->mac.type != e1000_i211))
-			adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
-	}
-	adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
-	adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
-
-	adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
-	adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
-	adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
-	adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
-	adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
-	adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
-	adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
-	adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
-	adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
-
-	/* Fill out the OS statistics structure */
-	net_stats->multicast = adapter->stats.mprc;
-	net_stats->collisions = adapter->stats.colc;
-
-	/* Rx Errors */
-
-	/* RLEC on some newer hardware can be incorrect so build
-	 * our own version based on RUC and ROC */
-	net_stats->rx_errors = adapter->stats.rxerrc +
-		adapter->stats.crcerrs + adapter->stats.algnerrc +
-		adapter->stats.ruc + adapter->stats.roc +
-		adapter->stats.cexterr;
-	net_stats->rx_length_errors = adapter->stats.ruc +
-				      adapter->stats.roc;
-	net_stats->rx_crc_errors = adapter->stats.crcerrs;
-	net_stats->rx_frame_errors = adapter->stats.algnerrc;
-	net_stats->rx_missed_errors = adapter->stats.mpc;
-
-	/* Tx Errors */
-	net_stats->tx_errors = adapter->stats.ecol +
-			       adapter->stats.latecol;
-	net_stats->tx_aborted_errors = adapter->stats.ecol;
-	net_stats->tx_window_errors = adapter->stats.latecol;
-	net_stats->tx_carrier_errors = adapter->stats.tncrs;
-
-	/* Tx Dropped needs to be maintained elsewhere */
-
-	/* Phy Stats */
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		if ((adapter->link_speed == SPEED_1000) &&
-		   (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
-			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
-			adapter->phy_stats.idle_errors += phy_tmp;
-		}
-	}
-
-	/* Management Stats */
-	adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
-	adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
-	if (hw->mac.type > e1000_82580) {
-		adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
-		adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
-		adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
-		adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
-	}
-}
-
-static irqreturn_t igb_msix_other(int irq, void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct e1000_hw *hw = &adapter->hw;
-	u32 icr = E1000_READ_REG(hw, E1000_ICR);
-	/* reading ICR causes bit 31 of EICR to be cleared */
-
-	if (icr & E1000_ICR_DRSTA)
-		schedule_work(&adapter->reset_task);
-
-	if (icr & E1000_ICR_DOUTSYNC) {
-		/* HW is reporting DMA is out of sync */
-		adapter->stats.doosync++;
-		/* The DMA Out of Sync is also indication of a spoof event
-		 * in IOV mode. Check the Wrong VM Behavior register to
-		 * see if it is really a spoof event. */
-		igb_check_wvbr(adapter);
-	}
-
-	/* Check for a mailbox event */
-	if (icr & E1000_ICR_VMMB)
-		igb_msg_task(adapter);
-
-	if (icr & E1000_ICR_LSC) {
-		hw->mac.get_link_status = 1;
-		/* guard against interrupt when we're going down */
-		if (!test_bit(__IGB_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
-	}
-
-#ifdef HAVE_PTP_1588_CLOCK
-	if (icr & E1000_ICR_TS) {
-		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
-
-		if (tsicr & E1000_TSICR_TXTS) {
-			/* acknowledge the interrupt */
-			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
-			/* retrieve hardware timestamp */
-			schedule_work(&adapter->ptp_tx_work);
-		}
-	}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	/* Check for MDD event */
-	if (icr & E1000_ICR_MDDET)
-		igb_process_mdd_event(adapter);
-
-	E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
-
-	return IRQ_HANDLED;
-}
-
-static void igb_write_itr(struct igb_q_vector *q_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	u32 itr_val = q_vector->itr_val & 0x7FFC;
-
-	if (!q_vector->set_itr)
-		return;
-
-	if (!itr_val)
-		itr_val = 0x4;
-
-	if (adapter->hw.mac.type == e1000_82575)
-		itr_val |= itr_val << 16;
-	else
-		itr_val |= E1000_EITR_CNT_IGNR;
-
-	writel(itr_val, q_vector->itr_register);
-	q_vector->set_itr = 0;
-}
-
-static irqreturn_t igb_msix_ring(int irq, void *data)
-{
-	struct igb_q_vector *q_vector = data;
-
-	/* Write the ITR value calculated from the previous interrupt. */
-	igb_write_itr(q_vector);
-
-	napi_schedule(&q_vector->napi);
-
-	return IRQ_HANDLED;
-}
-
-#ifdef IGB_DCA
-static void igb_update_tx_dca(struct igb_adapter *adapter,
-			      struct igb_ring *tx_ring,
-			      int cpu)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
-
-	if (hw->mac.type != e1000_82575)
-		txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
-
-	/*
-	 * We can enable relaxed ordering for reads, but not writes when
-	 * DCA is enabled.  This is due to a known issue in some chipsets
-	 * which will cause the DCA tag to be cleared.
-	 */
-	txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
-		  E1000_DCA_TXCTRL_DATA_RRO_EN |
-		  E1000_DCA_TXCTRL_DESC_DCA_EN;
-
-	E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
-}
-
-static void igb_update_rx_dca(struct igb_adapter *adapter,
-			      struct igb_ring *rx_ring,
-			      int cpu)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
-
-	if (hw->mac.type != e1000_82575)
-		rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
-
-	/*
-	 * We can enable relaxed ordering for reads, but not writes when
-	 * DCA is enabled.  This is due to a known issue in some chipsets
-	 * which will cause the DCA tag to be cleared.
-	 */
-	rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
-		  E1000_DCA_RXCTRL_DESC_DCA_EN;
-
-	E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
-}
-
-static void igb_update_dca(struct igb_q_vector *q_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	int cpu = get_cpu();
-
-	if (q_vector->cpu == cpu)
-		goto out_no_update;
-
-	if (q_vector->tx.ring)
-		igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
-
-	if (q_vector->rx.ring)
-		igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
-
-	q_vector->cpu = cpu;
-out_no_update:
-	put_cpu();
-}
-
-static void igb_setup_dca(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-
-	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
-		return;
-
-	/* Always use CB2 mode, difference is masked in the CB driver. */
-	E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
-
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		adapter->q_vector[i]->cpu = -1;
-		igb_update_dca(adapter->q_vector[i]);
-	}
-}
-
-static int __igb_notify_dca(struct device *dev, void *data)
-{
-	struct net_device *netdev = dev_get_drvdata(dev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_hw *hw = &adapter->hw;
-	unsigned long event = *(unsigned long *)data;
-
-	switch (event) {
-	case DCA_PROVIDER_ADD:
-		/* if already enabled, don't do it again */
-		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
-			break;
-		if (dca_add_requester(dev) == E1000_SUCCESS) {
-			adapter->flags |= IGB_FLAG_DCA_ENABLED;
-			dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
-			igb_setup_dca(adapter);
-			break;
-		}
-		/* Fall Through since DCA is disabled. */
-	case DCA_PROVIDER_REMOVE:
-		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
-			/* without this a class_device is left
-			 * hanging around in the sysfs model */
-			dca_remove_requester(dev);
-			dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
-			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-			E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
-		}
-		break;
-	}
-
-	return E1000_SUCCESS;
-}
-
-static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
-                          void *p)
-{
-	int ret_val;
-
-	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
-	                                 __igb_notify_dca);
-
-	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
-}
-#endif /* IGB_DCA */
-
-static int igb_vf_configure(struct igb_adapter *adapter, int vf)
-{
-	unsigned char mac_addr[ETH_ALEN];
-
-	random_ether_addr(mac_addr);
-	igb_set_vf_mac(adapter, vf, mac_addr);
-
-#ifdef IFLA_VF_MAX
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-	/* By default spoof check is enabled for all VFs */
-	adapter->vf_data[vf].spoofchk_enabled = true;
-#endif
-#endif
-
-	return true;
-}
-
-static void igb_ping_all_vfs(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ping;
-	int i;
-
-	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
-		ping = E1000_PF_CONTROL_MSG;
-		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
-			ping |= E1000_VT_MSGTYPE_CTS;
-		e1000_write_mbx(hw, &ping, 1, i);
-	}
-}
-
-/**
- *  igb_mta_set_ - Set multicast filter table address
- *  @adapter: pointer to the adapter structure
- *  @hash_value: determines the MTA register and bit to set
- *
- *  The multicast table address is a register array of 32-bit registers.
- *  The hash_value is used to determine what register the bit is in, the
- *  current value is read, the new bit is OR'd in and the new value is
- *  written back into the register.
- **/
-void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 hash_bit, hash_reg, mta;
-
-	/*
-	 * The MTA is a register array of 32-bit registers. It is
-	 * treated like an array of (32*mta_reg_count) bits.  We want to
-	 * set bit BitArray[hash_value]. So we figure out what register
-	 * the bit is in, read it, OR in the new bit, then write
-	 * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
-	 * mask to bits 31:5 of the hash value which gives us the
-	 * register we're modifying.  The hash bit within that register
-	 * is determined by the lower 5 bits of the hash value.
-	 */
-	hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
-	hash_bit = hash_value & 0x1F;
-
-	mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
-
-	mta |= (1 << hash_bit);
-
-	E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
-	E1000_WRITE_FLUSH(hw);
-}
-
-static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
-{
-
-	struct e1000_hw *hw = &adapter->hw;
-	u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
-	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
-
-	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
-	                    IGB_VF_FLAG_MULTI_PROMISC);
-	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
-
-#ifdef IGB_ENABLE_VF_PROMISC
-	if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
-		vmolr |= E1000_VMOLR_ROPE;
-		vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
-		*msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
-	}
-#endif
-	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
-		vmolr |= E1000_VMOLR_MPME;
-		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
-		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
-	} else {
-		/*
-		 * if we have hashes and we are clearing a multicast promisc
-		 * flag we need to write the hashes to the MTA as this step
-		 * was previously skipped
-		 */
-		if (vf_data->num_vf_mc_hashes > 30) {
-			vmolr |= E1000_VMOLR_MPME;
-		} else if (vf_data->num_vf_mc_hashes) {
-			int j;
-			vmolr |= E1000_VMOLR_ROMPE;
-			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
-				igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
-		}
-	}
-
-	E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
-
-	/* there are flags left unprocessed, likely not supported */
-	if (*msgbuf & E1000_VT_MSGINFO_MASK)
-		return -EINVAL;
-
-	return 0;
-
-}
-
-static int igb_set_vf_multicasts(struct igb_adapter *adapter,
-				  u32 *msgbuf, u32 vf)
-{
-	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
-	u16 *hash_list = (u16 *)&msgbuf[1];
-	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
-	int i;
-
-	/* salt away the number of multicast addresses assigned
-	 * to this VF for later use to restore when the PF multi cast
-	 * list changes
-	 */
-	vf_data->num_vf_mc_hashes = n;
-
-	/* only up to 30 hash values supported */
-	if (n > 30)
-		n = 30;
-
-	/* store the hashes for later use */
-	for (i = 0; i < n; i++)
-		vf_data->vf_mc_hashes[i] = hash_list[i];
-
-	/* Flush and reset the mta with the new values */
-	igb_set_rx_mode(adapter->netdev);
-
-	return 0;
-}
-
-static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct vf_data_storage *vf_data;
-	int i, j;
-
-	for (i = 0; i < adapter->vfs_allocated_count; i++) {
-		u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
-		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
-
-		vf_data = &adapter->vf_data[i];
-
-		if ((vf_data->num_vf_mc_hashes > 30) ||
-		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
-			vmolr |= E1000_VMOLR_MPME;
-		} else if (vf_data->num_vf_mc_hashes) {
-			vmolr |= E1000_VMOLR_ROMPE;
-			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
-				igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
-		}
-		E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
-	}
-}
-
-static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 pool_mask, reg, vid;
-	u16 vlan_default;
-	int i;
-
-	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
-
-	/* Find the vlan filter for this id */
-	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-		reg = E1000_READ_REG(hw, E1000_VLVF(i));
-
-		/* remove the vf from the pool */
-		reg &= ~pool_mask;
-
-		/* if pool is empty then remove entry from vfta */
-		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
-		    (reg & E1000_VLVF_VLANID_ENABLE)) {
-			reg = 0;
-			vid = reg & E1000_VLVF_VLANID_MASK;
-			igb_vfta_set(adapter, vid, FALSE);
-		}
-
-		E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
-	}
-
-	adapter->vf_data[vf].vlans_enabled = 0;
-
-	vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
-	if (vlan_default)
-		igb_vlvf_set(adapter, vlan_default, true, vf);
-}
-
-s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 reg, i;
-
-	/* The vlvf table only exists on 82576 hardware and newer */
-	if (hw->mac.type < e1000_82576)
-		return -1;
-
-	/* we only need to do this if VMDq is enabled */
-	if (!adapter->vmdq_pools)
-		return -1;
-
-	/* Find the vlan filter for this id */
-	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-		reg = E1000_READ_REG(hw, E1000_VLVF(i));
-		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
-		    vid == (reg & E1000_VLVF_VLANID_MASK))
-			break;
-	}
-
-	if (add) {
-		if (i == E1000_VLVF_ARRAY_SIZE) {
-			/* Did not find a matching VLAN ID entry that was
-			 * enabled.  Search for a free filter entry, i.e.
-			 * one without the enable bit set
-			 */
-			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-				reg = E1000_READ_REG(hw, E1000_VLVF(i));
-				if (!(reg & E1000_VLVF_VLANID_ENABLE))
-					break;
-			}
-		}
-		if (i < E1000_VLVF_ARRAY_SIZE) {
-			/* Found an enabled/available entry */
-			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
-
-			/* if !enabled we need to set this up in vfta */
-			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
-				/* add VID to filter table */
-				igb_vfta_set(adapter, vid, TRUE);
-				reg |= E1000_VLVF_VLANID_ENABLE;
-			}
-			reg &= ~E1000_VLVF_VLANID_MASK;
-			reg |= vid;
-			E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
-
-			/* do not modify RLPML for PF devices */
-			if (vf >= adapter->vfs_allocated_count)
-				return E1000_SUCCESS;
-
-			if (!adapter->vf_data[vf].vlans_enabled) {
-				u32 size;
-				reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
-				size = reg & E1000_VMOLR_RLPML_MASK;
-				size += 4;
-				reg &= ~E1000_VMOLR_RLPML_MASK;
-				reg |= size;
-				E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
-			}
-
-			adapter->vf_data[vf].vlans_enabled++;
-		}
-	} else {
-		if (i < E1000_VLVF_ARRAY_SIZE) {
-			/* remove vf from the pool */
-			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
-			/* if pool is empty then remove entry from vfta */
-			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
-				reg = 0;
-				igb_vfta_set(adapter, vid, FALSE);
-			}
-			E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
-
-			/* do not modify RLPML for PF devices */
-			if (vf >= adapter->vfs_allocated_count)
-				return E1000_SUCCESS;
-
-			adapter->vf_data[vf].vlans_enabled--;
-			if (!adapter->vf_data[vf].vlans_enabled) {
-				u32 size;
-				reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
-				size = reg & E1000_VMOLR_RLPML_MASK;
-				size -= 4;
-				reg &= ~E1000_VMOLR_RLPML_MASK;
-				reg |= size;
-				E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
-			}
-		}
-	}
-	return E1000_SUCCESS;
-}
-
-#ifdef IFLA_VF_MAX
-static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (vid)
-		E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
-	else
-		E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
-}
-
-static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-#ifdef HAVE_VF_VLAN_PROTO
-			       int vf, u16 vlan, u8 qos, __be16 vlan_proto)
-#else
-			       int vf, u16 vlan, u8 qos)
-#endif
-{
-	int err = 0;
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	/* VLAN IDs accepted range 0-4094 */
-	if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
-		return -EINVAL;
-
-#ifdef HAVE_VF_VLAN_PROTO
-	if (vlan_proto != htons(ETH_P_8021Q))
-		return -EPROTONOSUPPORT;
-#endif
-
-	if (vlan || qos) {
-		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
-		if (err)
-			goto out;
-		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
-		igb_set_vmolr(adapter, vf, !vlan);
-		adapter->vf_data[vf].pf_vlan = vlan;
-		adapter->vf_data[vf].pf_qos = qos;
-		igb_set_vf_vlan_strip(adapter, vf, true);
-		dev_info(&adapter->pdev->dev,
-			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
-		if (test_bit(__IGB_DOWN, &adapter->state)) {
-			dev_warn(&adapter->pdev->dev,
-				 "The VF VLAN has been set,"
-				 " but the PF device is not up.\n");
-			dev_warn(&adapter->pdev->dev,
-				 "Bring the PF device up before"
-				 " attempting to use the VF device.\n");
-		}
-	} else {
-		if (adapter->vf_data[vf].pf_vlan)
-			dev_info(&adapter->pdev->dev,
-				 "Clearing VLAN on VF %d\n", vf);
-		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
-				   false, vf);
-		igb_set_vmvir(adapter, vlan, vf);
-		igb_set_vmolr(adapter, vf, true);
-		igb_set_vf_vlan_strip(adapter, vf, false);
-		adapter->vf_data[vf].pf_vlan = 0;
-		adapter->vf_data[vf].pf_qos = 0;
-       }
-out:
-       return err;
-}
-
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
-				bool setting)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 dtxswc, reg_offset;
-
-	if (!adapter->vfs_allocated_count)
-		return -EOPNOTSUPP;
-
-	if (vf >= adapter->vfs_allocated_count)
-		return -EINVAL;
-
-	reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
-	dtxswc = E1000_READ_REG(hw, reg_offset);
-	if (setting)
-		dtxswc |= ((1 << vf) |
-			   (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
-	else
-		dtxswc &= ~((1 << vf) |
-			    (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
-	E1000_WRITE_REG(hw, reg_offset, dtxswc);
-
-	adapter->vf_data[vf].spoofchk_enabled = setting;
-	return E1000_SUCCESS;
-}
-#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
-#endif /* IFLA_VF_MAX */
-
-static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-	u32 reg;
-
-	/* Find the vlan filter for this id */
-	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
-		reg = E1000_READ_REG(hw, E1000_VLVF(i));
-		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
-		    vid == (reg & E1000_VLVF_VLANID_MASK))
-			break;
-	}
-
-	if (i >= E1000_VLVF_ARRAY_SIZE)
-		i = -1;
-
-	return i;
-}
-
-static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
-	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
-	int err = 0;
-
-	if (vid)
-		igb_set_vf_vlan_strip(adapter, vf, true);
-	else
-		igb_set_vf_vlan_strip(adapter, vf, false);
-
-	/* If in promiscuous mode we need to make sure the PF also has
-	 * the VLAN filter set.
-	 */
-	if (add && (adapter->netdev->flags & IFF_PROMISC))
-		err = igb_vlvf_set(adapter, vid, add,
-				   adapter->vfs_allocated_count);
-	if (err)
-		goto out;
-
-	err = igb_vlvf_set(adapter, vid, add, vf);
-
-	if (err)
-		goto out;
-
-	/* Go through all the checks to see if the VLAN filter should
-	 * be wiped completely.
-	 */
-	if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
-		u32 vlvf, bits;
-
-		int regndx = igb_find_vlvf_entry(adapter, vid);
-		if (regndx < 0)
-			goto out;
-		/* See if any other pools are set for this VLAN filter
-		 * entry other than the PF.
-		 */
-		vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx));
-		bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
-			      adapter->vfs_allocated_count);
-		/* If the filter was removed then ensure PF pool bit
-		 * is cleared if the PF only added itself to the pool
-		 * because the PF is in promiscuous mode.
-		 */
-		if ((vlvf & VLAN_VID_MASK) == vid &&
-#ifndef HAVE_VLAN_RX_REGISTER
-		    !test_bit(vid, adapter->active_vlans) &&
-#endif
-		    !bits)
-			igb_vlvf_set(adapter, vid, add,
-				     adapter->vfs_allocated_count);
-	}
-
-out:
-	return err;
-}
-
-static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	/* clear flags except flag that the PF has set the MAC */
-	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
-	adapter->vf_data[vf].last_nack = jiffies;
-
-	/* reset offloads to defaults */
-	igb_set_vmolr(adapter, vf, true);
-
-	/* reset vlans for device */
-	igb_clear_vf_vfta(adapter, vf);
-#ifdef IFLA_VF_MAX
-	if (adapter->vf_data[vf].pf_vlan)
-		igb_ndo_set_vf_vlan(adapter->netdev, vf,
-				    adapter->vf_data[vf].pf_vlan,
-#ifdef HAVE_VF_VLAN_PROTO
-				    adapter->vf_data[vf].pf_qos,
-				    htons(ETH_P_8021Q));
-#else
-				    adapter->vf_data[vf].pf_qos);
-#endif
-	else
-		igb_clear_vf_vfta(adapter, vf);
-#endif
-
-	/* reset multicast table array for vf */
-	adapter->vf_data[vf].num_vf_mc_hashes = 0;
-
-	/* Flush and reset the mta with the new values */
-	igb_set_rx_mode(adapter->netdev);
-
-	/*
-	 * Reset the VFs TDWBAL and TDWBAH registers which are not
-	 * cleared by a VFLR
-	 */
-	E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
-	E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
-	if (hw->mac.type == e1000_82576) {
-		E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
-		E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
-	}
-}
-
-static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
-{
-	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
-
-	/* generate a new mac address as we were hotplug removed/added */
-	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
-		random_ether_addr(vf_mac);
-
-	/* process remaining reset events */
-	igb_vf_reset(adapter, vf);
-}
-
-static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
-	u32 reg, msgbuf[3];
-	u8 *addr = (u8 *)(&msgbuf[1]);
-
-	/* process all the same items cleared in a function level reset */
-	igb_vf_reset(adapter, vf);
-
-	/* set vf mac address */
-	igb_del_mac_filter(adapter, vf_mac, vf);
-	igb_add_mac_filter(adapter, vf_mac, vf);
-
-	/* enable transmit and receive for vf */
-	reg = E1000_READ_REG(hw, E1000_VFTE);
-	E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
-	reg = E1000_READ_REG(hw, E1000_VFRE);
-	E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
-
-	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
-
-	/* reply to reset with ack and vf mac address */
-	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
-	memcpy(addr, vf_mac, 6);
-	e1000_write_mbx(hw, msgbuf, 3, vf);
-}
-
-static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
-{
-	/*
-	 * The VF MAC Address is stored in a packed array of bytes
-	 * starting at the second 32 bit word of the msg array
-	 */
-	unsigned char *addr = (unsigned char *)&msg[1];
-	int err = -1;
-
-	if (is_valid_ether_addr(addr))
-		err = igb_set_vf_mac(adapter, vf, addr);
-
-	return err;
-}
-
-static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
-	u32 msg = E1000_VT_MSGTYPE_NACK;
-
-	/* if device isn't clear to send it shouldn't be reading either */
-	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
-	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
-		e1000_write_mbx(hw, &msg, 1, vf);
-		vf_data->last_nack = jiffies;
-	}
-}
-
-static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	u32 msgbuf[E1000_VFMAILBOX_SIZE];
-	struct e1000_hw *hw = &adapter->hw;
-	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
-	s32 retval;
-
-	retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
-
-	if (retval) {
-		dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
-		return;
-	}
-
-	/* this is a message we already processed, do nothing */
-	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
-		return;
-
-	/*
-	 * until the vf completes a reset it should not be
-	 * allowed to start any configuration.
-	 */
-
-	if (msgbuf[0] == E1000_VF_RESET) {
-		igb_vf_reset_msg(adapter, vf);
-		return;
-	}
-
-	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
-		msgbuf[0] = E1000_VT_MSGTYPE_NACK;
-		if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
-			e1000_write_mbx(hw, msgbuf, 1, vf);
-			vf_data->last_nack = jiffies;
-		}
-		return;
-	}
-
-	switch ((msgbuf[0] & 0xFFFF)) {
-	case E1000_VF_SET_MAC_ADDR:
-		retval = -EINVAL;
-#ifndef IGB_DISABLE_VF_MAC_SET
-		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
-			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
-		else
-			DPRINTK(DRV, INFO,
-				"VF %d attempted to override administratively "
-				"set MAC address\nReload the VF driver to "
-				"resume operations\n", vf);
-#endif
-		break;
-	case E1000_VF_SET_PROMISC:
-		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
-		break;
-	case E1000_VF_SET_MULTICAST:
-		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
-		break;
-	case E1000_VF_SET_LPE:
-		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
-		break;
-	case E1000_VF_SET_VLAN:
-		retval = -1;
-#ifdef IFLA_VF_MAX
-		if (vf_data->pf_vlan)
-			DPRINTK(DRV, INFO,
-				"VF %d attempted to override administratively "
-				"set VLAN tag\nReload the VF driver to "
-				"resume operations\n", vf);
-		else
-#endif
-			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
-		break;
-	default:
-		dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
-		retval = -E1000_ERR_MBX;
-		break;
-	}
-
-	/* notify the VF of the results of what it sent us */
-	if (retval)
-		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
-	else
-		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
-
-	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
-
-	e1000_write_mbx(hw, msgbuf, 1, vf);
-}
-
-static void igb_msg_task(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 vf;
-
-	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
-		/* process any reset requests */
-		if (!e1000_check_for_rst(hw, vf))
-			igb_vf_reset_event(adapter, vf);
-
-		/* process any messages pending */
-		if (!e1000_check_for_msg(hw, vf))
-			igb_rcv_msg_from_vf(adapter, vf);
-
-		/* process any acks */
-		if (!e1000_check_for_ack(hw, vf))
-			igb_rcv_ack_from_vf(adapter, vf);
-	}
-}
-
-/**
- *  igb_set_uta - Set unicast filter table address
- *  @adapter: board private structure
- *
- *  The unicast table address is a register array of 32-bit registers.
- *  The table is meant to be used in a way similar to how the MTA is used
- *  however due to certain limitations in the hardware it is necessary to
- *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
- *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
- **/
-static void igb_set_uta(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-
-	/* The UTA table only exists on 82576 hardware and newer */
-	if (hw->mac.type < e1000_82576)
-		return;
-
-	/* we only need to do this if VMDq is enabled */
-	if (!adapter->vmdq_pools)
-		return;
-
-	for (i = 0; i < hw->mac.uta_reg_count; i++)
-		E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
-}
-
-/**
- * igb_intr_msi - Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-static irqreturn_t igb_intr_msi(int irq, void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct igb_q_vector *q_vector = adapter->q_vector[0];
-	struct e1000_hw *hw = &adapter->hw;
-	/* read ICR disables interrupts using IAM */
-	u32 icr = E1000_READ_REG(hw, E1000_ICR);
-
-	igb_write_itr(q_vector);
-
-	if (icr & E1000_ICR_DRSTA)
-		schedule_work(&adapter->reset_task);
-
-	if (icr & E1000_ICR_DOUTSYNC) {
-		/* HW is reporting DMA is out of sync */
-		adapter->stats.doosync++;
-	}
-
-	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
-		if (!test_bit(__IGB_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
-	}
-
-#ifdef HAVE_PTP_1588_CLOCK
-	if (icr & E1000_ICR_TS) {
-		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
-
-		if (tsicr & E1000_TSICR_TXTS) {
-			/* acknowledge the interrupt */
-			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
-			/* retrieve hardware timestamp */
-			schedule_work(&adapter->ptp_tx_work);
-		}
-	}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	napi_schedule(&q_vector->napi);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * igb_intr - Legacy Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-static irqreturn_t igb_intr(int irq, void *data)
-{
-	struct igb_adapter *adapter = data;
-	struct igb_q_vector *q_vector = adapter->q_vector[0];
-	struct e1000_hw *hw = &adapter->hw;
-	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
-	 * need for the IMC write */
-	u32 icr = E1000_READ_REG(hw, E1000_ICR);
-
-	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
-	 * not set, then the adapter didn't send an interrupt */
-	if (!(icr & E1000_ICR_INT_ASSERTED))
-		return IRQ_NONE;
-
-	igb_write_itr(q_vector);
-
-	if (icr & E1000_ICR_DRSTA)
-		schedule_work(&adapter->reset_task);
-
-	if (icr & E1000_ICR_DOUTSYNC) {
-		/* HW is reporting DMA is out of sync */
-		adapter->stats.doosync++;
-	}
-
-	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
-		/* guard against interrupt when we're going down */
-		if (!test_bit(__IGB_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
-	}
-
-#ifdef HAVE_PTP_1588_CLOCK
-	if (icr & E1000_ICR_TS) {
-		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
-
-		if (tsicr & E1000_TSICR_TXTS) {
-			/* acknowledge the interrupt */
-			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
-			/* retrieve hardware timestamp */
-			schedule_work(&adapter->ptp_tx_work);
-		}
-	}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	napi_schedule(&q_vector->napi);
-
-	return IRQ_HANDLED;
-}
-
-void igb_ring_irq_enable(struct igb_q_vector *q_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct e1000_hw *hw = &adapter->hw;
-
-	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
-	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
-		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
-			igb_set_itr(q_vector);
-		else
-			igb_update_ring_itr(q_vector);
-	}
-
-	if (!test_bit(__IGB_DOWN, &adapter->state)) {
-		if (adapter->msix_entries)
-			E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
-		else
-			igb_irq_enable(adapter);
-	}
-}
-
-/**
- * igb_poll - NAPI Rx polling callback
- * @napi: napi polling structure
- * @budget: count of how many packets we should handle
- **/
-static int igb_poll(struct napi_struct *napi, int budget)
-{
-	struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
-	bool clean_complete = true;
-
-#ifdef IGB_DCA
-	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
-		igb_update_dca(q_vector);
-#endif
-	if (q_vector->tx.ring)
-		clean_complete = igb_clean_tx_irq(q_vector);
-
-	if (q_vector->rx.ring)
-		clean_complete &= igb_clean_rx_irq(q_vector, budget);
-
-#ifndef HAVE_NETDEV_NAPI_LIST
-	/* if netdev is disabled we need to stop polling */
-	if (!netif_running(q_vector->adapter->netdev))
-		clean_complete = true;
-
-#endif
-	/* If all work not completed, return budget and keep polling */
-	if (!clean_complete)
-		return budget;
-
-	/* If not enough Rx work done, exit the polling mode */
-	napi_complete(napi);
-	igb_ring_irq_enable(q_vector);
-
-	return 0;
-}
-
-/**
- * igb_clean_tx_irq - Reclaim resources after transmit completes
- * @q_vector: pointer to q_vector containing needed info
- * returns TRUE if ring is completely cleaned
- **/
-static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct igb_ring *tx_ring = q_vector->tx.ring;
-	struct igb_tx_buffer *tx_buffer;
-	union e1000_adv_tx_desc *tx_desc;
-	unsigned int total_bytes = 0, total_packets = 0;
-	unsigned int budget = q_vector->tx.work_limit;
-	unsigned int i = tx_ring->next_to_clean;
-
-	if (test_bit(__IGB_DOWN, &adapter->state))
-		return true;
-
-	tx_buffer = &tx_ring->tx_buffer_info[i];
-	tx_desc = IGB_TX_DESC(tx_ring, i);
-	i -= tx_ring->count;
-
-	do {
-		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
-
-		/* if next_to_watch is not set then there is no work pending */
-		if (!eop_desc)
-			break;
-
-		/* prevent any other reads prior to eop_desc */
-		read_barrier_depends();
-
-		/* if DD is not set pending work has not been completed */
-		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
-			break;
-
-		/* clear next_to_watch to prevent false hangs */
-		tx_buffer->next_to_watch = NULL;
-
-		/* update the statistics for this packet */
-		total_bytes += tx_buffer->bytecount;
-		total_packets += tx_buffer->gso_segs;
-
-		/* free the skb */
-		dev_kfree_skb_any(tx_buffer->skb);
-
-		/* unmap skb header data */
-		dma_unmap_single(tx_ring->dev,
-		                 dma_unmap_addr(tx_buffer, dma),
-		                 dma_unmap_len(tx_buffer, len),
-		                 DMA_TO_DEVICE);
-
-		/* clear tx_buffer data */
-		tx_buffer->skb = NULL;
-		dma_unmap_len_set(tx_buffer, len, 0);
-
-		/* clear last DMA location and unmap remaining buffers */
-		while (tx_desc != eop_desc) {
-			tx_buffer++;
-			tx_desc++;
-			i++;
-			if (unlikely(!i)) {
-				i -= tx_ring->count;
-				tx_buffer = tx_ring->tx_buffer_info;
-				tx_desc = IGB_TX_DESC(tx_ring, 0);
-			}
-
-			/* unmap any remaining paged data */
-			if (dma_unmap_len(tx_buffer, len)) {
-				dma_unmap_page(tx_ring->dev,
-				               dma_unmap_addr(tx_buffer, dma),
-				               dma_unmap_len(tx_buffer, len),
-				               DMA_TO_DEVICE);
-				dma_unmap_len_set(tx_buffer, len, 0);
-			}
-		}
-
-		/* move us one more past the eop_desc for start of next pkt */
-		tx_buffer++;
-		tx_desc++;
-		i++;
-		if (unlikely(!i)) {
-			i -= tx_ring->count;
-			tx_buffer = tx_ring->tx_buffer_info;
-			tx_desc = IGB_TX_DESC(tx_ring, 0);
-		}
-
-		/* issue prefetch for next Tx descriptor */
-		prefetch(tx_desc);
-
-		/* update budget accounting */
-		budget--;
-	} while (likely(budget));
-
-	netdev_tx_completed_queue(txring_txq(tx_ring),
-				  total_packets, total_bytes);
-
-	i += tx_ring->count;
-	tx_ring->next_to_clean = i;
-	tx_ring->tx_stats.bytes += total_bytes;
-	tx_ring->tx_stats.packets += total_packets;
-	q_vector->tx.total_bytes += total_bytes;
-	q_vector->tx.total_packets += total_packets;
-
-#ifdef DEBUG
-	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) &&
-	    !(adapter->disable_hw_reset && adapter->tx_hang_detected)) {
-#else
-	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
-#endif
-		struct e1000_hw *hw = &adapter->hw;
-
-		/* Detect a transmit hang in hardware, this serializes the
-		 * check with the clearing of time_stamp and movement of i */
-		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
-		if (tx_buffer->next_to_watch &&
-		    time_after(jiffies, tx_buffer->time_stamp +
-		               (adapter->tx_timeout_factor * HZ))
-		    && !(E1000_READ_REG(hw, E1000_STATUS) &
-		         E1000_STATUS_TXOFF)) {
-
-			/* detected Tx unit hang */
-#ifdef DEBUG
-			adapter->tx_hang_detected = TRUE;
-			if (adapter->disable_hw_reset) {
-				DPRINTK(DRV, WARNING,
-					"Deactivating netdev watchdog timer\n");
-				if (del_timer(&netdev_ring(tx_ring)->watchdog_timer))
-					dev_put(netdev_ring(tx_ring));
-#ifndef HAVE_NET_DEVICE_OPS
-				netdev_ring(tx_ring)->tx_timeout = NULL;
-#endif
-			}
-#endif /* DEBUG */
-			dev_err(tx_ring->dev,
-				"Detected Tx Unit Hang\n"
-				"  Tx Queue             <%d>\n"
-				"  TDH                  <%x>\n"
-				"  TDT                  <%x>\n"
-				"  next_to_use          <%x>\n"
-				"  next_to_clean        <%x>\n"
-				"buffer_info[next_to_clean]\n"
-				"  time_stamp           <%lx>\n"
-				"  next_to_watch        <%p>\n"
-				"  jiffies              <%lx>\n"
-				"  desc.status          <%x>\n",
-				tx_ring->queue_index,
-				E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
-				readl(tx_ring->tail),
-				tx_ring->next_to_use,
-				tx_ring->next_to_clean,
-				tx_buffer->time_stamp,
-				tx_buffer->next_to_watch,
-				jiffies,
-				tx_buffer->next_to_watch->wb.status);
-			if (netif_is_multiqueue(netdev_ring(tx_ring)))
-				netif_stop_subqueue(netdev_ring(tx_ring),
-						    ring_queue_index(tx_ring));
-			else
-				netif_stop_queue(netdev_ring(tx_ring));
-
-			/* we are about to reset, no point in enabling stuff */
-			return true;
-		}
-	}
-
-#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
-	if (unlikely(total_packets &&
-		     netif_carrier_ok(netdev_ring(tx_ring)) &&
-		     igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
-		/* Make sure that anybody stopping the queue after this
-		 * sees the new next_to_clean.
-		 */
-		smp_mb();
-		if (netif_is_multiqueue(netdev_ring(tx_ring))) {
-			if (__netif_subqueue_stopped(netdev_ring(tx_ring),
-						     ring_queue_index(tx_ring)) &&
-			    !(test_bit(__IGB_DOWN, &adapter->state))) {
-				netif_wake_subqueue(netdev_ring(tx_ring),
-						    ring_queue_index(tx_ring));
-				tx_ring->tx_stats.restart_queue++;
-			}
-		} else {
-			if (netif_queue_stopped(netdev_ring(tx_ring)) &&
-			    !(test_bit(__IGB_DOWN, &adapter->state))) {
-				netif_wake_queue(netdev_ring(tx_ring));
-				tx_ring->tx_stats.restart_queue++;
-			}
-		}
-	}
-
-	return !!budget;
-}
-
-#ifdef HAVE_VLAN_RX_REGISTER
-/**
- * igb_receive_skb - helper function to handle rx indications
- * @q_vector: structure containing interrupt and ring information
- * @skb: packet to send up
- **/
-static void igb_receive_skb(struct igb_q_vector *q_vector,
-                            struct sk_buff *skb)
-{
-	struct vlan_group **vlgrp = netdev_priv(skb->dev);
-
-	if (IGB_CB(skb)->vid) {
-		if (*vlgrp) {
-			vlan_gro_receive(&q_vector->napi, *vlgrp,
-					 IGB_CB(skb)->vid, skb);
-		} else {
-			dev_kfree_skb_any(skb);
-		}
-	} else {
-		napi_gro_receive(&q_vector->napi, skb);
-	}
-}
-
-#endif /* HAVE_VLAN_RX_REGISTER */
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-/**
- * igb_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
-static void igb_reuse_rx_page(struct igb_ring *rx_ring,
-			      struct igb_rx_buffer *old_buff)
-{
-	struct igb_rx_buffer *new_buff;
-	u16 nta = rx_ring->next_to_alloc;
-
-	new_buff = &rx_ring->rx_buffer_info[nta];
-
-	/* update, and store next to alloc */
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	/* transfer page from old buffer to new buffer */
-	memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
-
-	/* sync the buffer for use by the device */
-	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
-					 old_buff->page_offset,
-					 IGB_RX_BUFSZ,
-					 DMA_FROM_DEVICE);
-}
-
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
-				  struct page *page,
-				  unsigned int truesize)
-{
-	/* avoid re-using remote pages */
-	if (unlikely(page_to_nid(page) != numa_node_id()))
-		return false;
-
-#if (PAGE_SIZE < 8192)
-	/* if we are only owner of page we can reuse it */
-	if (unlikely(page_count(page) != 1))
-		return false;
-
-	/* flip page offset to other buffer */
-	rx_buffer->page_offset ^= IGB_RX_BUFSZ;
-
-#else
-	/* move offset up to the next cache line */
-	rx_buffer->page_offset += truesize;
-
-	if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
-		return false;
-#endif
-
-	/* bump ref count on page before it is given to the stack */
-	get_page(page);
-
-	return true;
-}
-
-/**
- * igb_add_rx_frag - Add contents of Rx buffer to sk_buff
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
- * @skb: sk_buff to place the data into
- *
- * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
- **/
-static bool igb_add_rx_frag(struct igb_ring *rx_ring,
-			    struct igb_rx_buffer *rx_buffer,
-			    union e1000_adv_rx_desc *rx_desc,
-			    struct sk_buff *skb)
-{
-	struct page *page = rx_buffer->page;
-	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = IGB_RX_BUFSZ;
-#else
-	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
-#endif
-
-	if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
-		unsigned char *va = page_address(page) + rx_buffer->page_offset;
-
-#ifdef HAVE_PTP_1588_CLOCK
-		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-			igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
-			va += IGB_TS_HDR_LEN;
-			size -= IGB_TS_HDR_LEN;
-		}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-		/* we can reuse buffer as-is, just make sure it is local */
-		if (likely(page_to_nid(page) == numa_node_id()))
-			return true;
-
-		/* this page cannot be reused so discard it */
-		put_page(page);
-		return false;
-	}
-
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			rx_buffer->page_offset, size, truesize);
-
-	return igb_can_reuse_rx_page(rx_buffer, page, truesize);
-}
-
-static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
-					   union e1000_adv_rx_desc *rx_desc,
-					   struct sk_buff *skb)
-{
-	struct igb_rx_buffer *rx_buffer;
-	struct page *page;
-
-	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-
-	page = rx_buffer->page;
-	prefetchw(page);
-
-	if (likely(!skb)) {
-		void *page_addr = page_address(page) +
-				  rx_buffer->page_offset;
-
-		/* prefetch first cache line of first page */
-		prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
-#endif
-
-		/* allocate a skb to store the frags */
-		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-						IGB_RX_HDR_LEN);
-		if (unlikely(!skb)) {
-			rx_ring->rx_stats.alloc_failed++;
-			return NULL;
-		}
-
-		/*
-		 * we will be copying header into skb->data in
-		 * pskb_may_pull so it is in our interest to prefetch
-		 * it now to avoid a possible cache miss
-		 */
-		prefetchw(skb->data);
-	}
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      IGB_RX_BUFSZ,
-				      DMA_FROM_DEVICE);
-
-	/* pull page into skb */
-	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
-		/* hand second half of page back to the ring */
-		igb_reuse_rx_page(rx_ring, rx_buffer);
-	} else {
-		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
-	}
-
-	/* clear contents of rx_buffer */
-	rx_buffer->page = NULL;
-
-	return skb;
-}
-
-#endif
-static inline void igb_rx_checksum(struct igb_ring *ring,
-				   union e1000_adv_rx_desc *rx_desc,
-				   struct sk_buff *skb)
-{
-	skb_checksum_none_assert(skb);
-
-	/* Ignore Checksum bit is set */
-	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
-		return;
-
-	/* Rx checksum disabled via ethtool */
-	if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
-		return;
-
-	/* TCP/UDP checksum error bit is set */
-	if (igb_test_staterr(rx_desc,
-			     E1000_RXDEXT_STATERR_TCPE |
-			     E1000_RXDEXT_STATERR_IPE)) {
-		/*
-		 * work around errata with sctp packets where the TCPE aka
-		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
-		 * packets, (aka let the stack check the crc32c)
-		 */
-		if (!((skb->len == 60) &&
-		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
-			ring->rx_stats.csum_err++;
-
-		/* let the stack verify checksum errors */
-		return;
-	}
-	/* It must be a TCP or UDP packet with a valid checksum */
-	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
-				      E1000_RXD_STAT_UDPCS))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-}
-
-#ifdef NETIF_F_RXHASH
-static inline void igb_rx_hash(struct igb_ring *ring,
-			       union e1000_adv_rx_desc *rx_desc,
-			       struct sk_buff *skb)
-{
-	if (netdev_ring(ring)->features & NETIF_F_RXHASH)
-		skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
-			     PKT_HASH_TYPE_L3);
-}
-
-#endif
-#ifndef IGB_NO_LRO
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-/**
- * igb_merge_active_tail - merge active tail into lro skb
- * @tail: pointer to active tail in frag_list
- *
- * This function merges the length and data of an active tail into the
- * skb containing the frag_list.  It resets the tail's pointer to the head,
- * but it leaves the heads pointer to tail intact.
- **/
-static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
-{
-	struct sk_buff *head = IGB_CB(tail)->head;
-
-	if (!head)
-		return tail;
-
-	head->len += tail->len;
-	head->data_len += tail->len;
-	head->truesize += tail->len;
-
-	IGB_CB(tail)->head = NULL;
-
-	return head;
-}
-
-/**
- * igb_add_active_tail - adds an active tail into the skb frag_list
- * @head: pointer to the start of the skb
- * @tail: pointer to active tail to add to frag_list
- *
- * This function adds an active tail to the end of the frag list.  This tail
- * will still be receiving data so we cannot yet ad it's stats to the main
- * skb.  That is done via igb_merge_active_tail.
- **/
-static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
-{
-	struct sk_buff *old_tail = IGB_CB(head)->tail;
-
-	if (old_tail) {
-		igb_merge_active_tail(old_tail);
-		old_tail->next = tail;
-	} else {
-		skb_shinfo(head)->frag_list = tail;
-	}
-
-	IGB_CB(tail)->head = head;
-	IGB_CB(head)->tail = tail;
-
-	IGB_CB(head)->append_cnt++;
-}
-
-/**
- * igb_close_active_frag_list - cleanup pointers on a frag_list skb
- * @head: pointer to head of an active frag list
- *
- * This function will clear the frag_tail_tracker pointer on an active
- * frag_list and returns true if the pointer was actually set
- **/
-static inline bool igb_close_active_frag_list(struct sk_buff *head)
-{
-	struct sk_buff *tail = IGB_CB(head)->tail;
-
-	if (!tail)
-		return false;
-
-	igb_merge_active_tail(tail);
-
-	IGB_CB(head)->tail = NULL;
-
-	return true;
-}
-
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-/**
- * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
- * @adapter: board private structure
- * @rx_desc: pointer to the rx descriptor
- * @skb: pointer to the skb to be merged
- *
- **/
-static inline bool igb_can_lro(struct igb_ring *rx_ring,
-			       union e1000_adv_rx_desc *rx_desc,
-			       struct sk_buff *skb)
-{
-	struct iphdr *iph = (struct iphdr *)skb->data;
-	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
-
-	/* verify hardware indicates this is IPv4/TCP */
-	if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
-	    !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
-		return false;
-
-	/* .. and LRO is enabled */
-	if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
-		return false;
-
-	/* .. and we are not in promiscuous mode */
-	if (netdev_ring(rx_ring)->flags & IFF_PROMISC)
-		return false;
-
-	/* .. and the header is large enough for us to read IP/TCP fields */
-	if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
-		return false;
-
-	/* .. and there are no VLANs on packet */
-	if (skb->protocol != __constant_htons(ETH_P_IP))
-		return false;
-
-	/* .. and we are version 4 with no options */
-	if (*(u8 *)iph != 0x45)
-		return false;
-
-	/* .. and the packet is not fragmented */
-	if (iph->frag_off & htons(IP_MF | IP_OFFSET))
-		return false;
-
-	/* .. and that next header is TCP */
-	if (iph->protocol != IPPROTO_TCP)
-		return false;
-
-	return true;
-}
-
-static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
-{
-	return (struct igb_lrohdr *)skb->data;
-}
-
-/**
- * igb_lro_flush - Indicate packets to upper layer.
- *
- * Update IP and TCP header part of head skb if more than one
- * skb's chained and indicate packets to upper layer.
- **/
-static void igb_lro_flush(struct igb_q_vector *q_vector,
-			  struct sk_buff *skb)
-{
-	struct igb_lro_list *lrolist = &q_vector->lrolist;
-
-	__skb_unlink(skb, &lrolist->active);
-
-	if (IGB_CB(skb)->append_cnt) {
-		struct igb_lrohdr *lroh = igb_lro_hdr(skb);
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		/* close any active lro contexts */
-		igb_close_active_frag_list(skb);
-
-#endif
-		/* incorporate ip header and re-calculate checksum */
-		lroh->iph.tot_len = ntohs(skb->len);
-		lroh->iph.check = 0;
-
-		/* header length is 5 since we know no options exist */
-		lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
-
-		/* clear TCP checksum to indicate we are an LRO frame */
-		lroh->th.check = 0;
-
-		/* incorporate latest timestamp into the tcp header */
-		if (IGB_CB(skb)->tsecr) {
-			lroh->ts[2] = IGB_CB(skb)->tsecr;
-			lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
-		}
-#ifdef NETIF_F_GSO
-
-		skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
-		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-#endif
-	}
-
-#ifdef HAVE_VLAN_RX_REGISTER
-	igb_receive_skb(q_vector, skb);
-#else
-	napi_gro_receive(&q_vector->napi, skb);
-#endif
-	lrolist->stats.flushed++;
-}
-
-static void igb_lro_flush_all(struct igb_q_vector *q_vector)
-{
-	struct igb_lro_list *lrolist = &q_vector->lrolist;
-	struct sk_buff *skb, *tmp;
-
-	skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
-		igb_lro_flush(q_vector, skb);
-}
-
-/*
- * igb_lro_header_ok - Main LRO function.
- **/
-static void igb_lro_header_ok(struct sk_buff *skb)
-{
-	struct igb_lrohdr *lroh = igb_lro_hdr(skb);
-	u16 opt_bytes, data_len;
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-	IGB_CB(skb)->tail = NULL;
-#endif
-	IGB_CB(skb)->tsecr = 0;
-	IGB_CB(skb)->append_cnt = 0;
-	IGB_CB(skb)->mss = 0;
-
-	/* ensure that the checksum is valid */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		return;
-
-	/* If we see CE codepoint in IP header, packet is not mergeable */
-	if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
-		return;
-
-	/* ensure no bits set besides ack or psh */
-	if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
-	    lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
-	    !lroh->th.ack)
-		return;
-
-	/* store the total packet length */
-	data_len = ntohs(lroh->iph.tot_len);
-
-	/* remove any padding from the end of the skb */
-	__pskb_trim(skb, data_len);
-
-	/* remove header length from data length */
-	data_len -= sizeof(struct igb_lrohdr);
-
-	/*
-	 * check for timestamps. Since the only option we handle are timestamps,
-	 * we only have to handle the simple case of aligned timestamps
-	 */
-	opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
-	if (opt_bytes != 0) {
-		if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
-		    !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
-					TCPOLEN_TSTAMP_ALIGNED) ||
-		    (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
-					     (TCPOPT_NOP << 16) |
-					     (TCPOPT_TIMESTAMP << 8) |
-					      TCPOLEN_TIMESTAMP)) ||
-		    (lroh->ts[2] == 0)) {
-			return;
-		}
-
-		IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
-		IGB_CB(skb)->tsecr = lroh->ts[2];
-
-		data_len -= TCPOLEN_TSTAMP_ALIGNED;
-	}
-
-	/* record data_len as mss for the packet */
-	IGB_CB(skb)->mss = data_len;
-	IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
-}
-
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
-{
-	struct skb_shared_info *sh_info;
-	struct skb_shared_info *new_skb_info;
-	unsigned int data_len;
-
-	sh_info = skb_shinfo(lro_skb);
-	new_skb_info = skb_shinfo(new_skb);
-
-	/* copy frags into the last skb */
-	memcpy(sh_info->frags + sh_info->nr_frags,
-	       new_skb_info->frags,
-	       new_skb_info->nr_frags * sizeof(skb_frag_t));
-
-	/* copy size data over */
-	sh_info->nr_frags += new_skb_info->nr_frags;
-	data_len = IGB_CB(new_skb)->mss;
-	lro_skb->len += data_len;
-	lro_skb->data_len += data_len;
-	lro_skb->truesize += data_len;
-
-	/* wipe record of data from new_skb */
-	new_skb_info->nr_frags = 0;
-	new_skb->len = new_skb->data_len = 0;
-	dev_kfree_skb_any(new_skb);
-}
-
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-/**
- * igb_lro_receive - if able, queue skb into lro chain
- * @q_vector: structure containing interrupt and ring information
- * @new_skb: pointer to current skb being checked
- *
- * Checks whether the skb given is eligible for LRO and if that's
- * fine chains it to the existing lro_skb based on flowid. If an LRO for
- * the flow doesn't exist create one.
- **/
-static void igb_lro_receive(struct igb_q_vector *q_vector,
-			    struct sk_buff *new_skb)
-{
-	struct sk_buff *lro_skb;
-	struct igb_lro_list *lrolist = &q_vector->lrolist;
-	struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
-	__be32 saddr = lroh->iph.saddr;
-	__be32 daddr = lroh->iph.daddr;
-	__be32 tcp_ports = *(__be32 *)&lroh->th;
-	u16 data_len;
-#ifdef HAVE_VLAN_RX_REGISTER
-	u16 vid = IGB_CB(new_skb)->vid;
-#else
-	u16 vid = new_skb->vlan_tci;
-#endif
-
-	igb_lro_header_ok(new_skb);
-
-	/*
-	 * we have a packet that might be eligible for LRO,
-	 * so see if it matches anything we might expect
-	 */
-	skb_queue_walk(&lrolist->active, lro_skb) {
-		if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
-		    igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
-		    igb_lro_hdr(lro_skb)->iph.daddr != daddr)
-			continue;
-
-#ifdef HAVE_VLAN_RX_REGISTER
-		if (IGB_CB(lro_skb)->vid != vid)
-#else
-		if (lro_skb->vlan_tci != vid)
-#endif
-			continue;
-
-		/* out of order packet */
-		if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
-			igb_lro_flush(q_vector, lro_skb);
-			IGB_CB(new_skb)->mss = 0;
-			break;
-		}
-
-		/* TCP timestamp options have changed */
-		if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
-			igb_lro_flush(q_vector, lro_skb);
-			break;
-		}
-
-		/* make sure timestamp values are increasing */
-		if (IGB_CB(lro_skb)->tsecr &&
-		    IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
-			igb_lro_flush(q_vector, lro_skb);
-			IGB_CB(new_skb)->mss = 0;
-			break;
-		}
-
-		data_len = IGB_CB(new_skb)->mss;
-
-		/* Check for all of the above below
-		 *   malformed header
-		 *   no tcp data
-		 *   resultant packet would be too large
-		 *   new skb is larger than our current mss
-		 *   data would remain in header
-		 *   we would consume more frags then the sk_buff contains
-		 *   ack sequence numbers changed
-		 *   window size has changed
-		 */
-		if (data_len == 0 ||
-		    data_len > IGB_CB(lro_skb)->mss ||
-		    data_len > IGB_CB(lro_skb)->free ||
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		    data_len != new_skb->data_len ||
-		    skb_shinfo(new_skb)->nr_frags >=
-		    (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) ||
-#endif
-		    igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
-		    igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
-			igb_lro_flush(q_vector, lro_skb);
-			break;
-		}
-
-		/* Remove IP and TCP header*/
-		skb_pull(new_skb, new_skb->len - data_len);
-
-		/* update timestamp and timestamp echo response */
-		IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
-		IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
-
-		/* update sequence and free space */
-		IGB_CB(lro_skb)->next_seq += data_len;
-		IGB_CB(lro_skb)->free -= data_len;
-
-		/* update append_cnt */
-		IGB_CB(lro_skb)->append_cnt++;
-
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		/* if header is empty pull pages into current skb */
-		igb_merge_frags(lro_skb, new_skb);
-#else
-		/* chain this new skb in frag_list */
-		igb_add_active_tail(lro_skb, new_skb);
-#endif
-
-		if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh ||
-		    skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) {
-			igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
-			igb_lro_flush(q_vector, lro_skb);
-		}
-
-		lrolist->stats.coal++;
-		return;
-	}
-
-	if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
-		/* if we are at capacity flush the tail */
-		if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
-			lro_skb = skb_peek_tail(&lrolist->active);
-			if (lro_skb)
-				igb_lro_flush(q_vector, lro_skb);
-		}
-
-		/* update sequence and free space */
-		IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
-		IGB_CB(new_skb)->free = 65521 - new_skb->len;
-
-		/* .. and insert at the front of the active list */
-		__skb_queue_head(&lrolist->active, new_skb);
-
-		lrolist->stats.coal++;
-		return;
-	}
-
-	/* packet not handled by any of the above, pass it to the stack */
-#ifdef HAVE_VLAN_RX_REGISTER
-	igb_receive_skb(q_vector, new_skb);
-#else
-	napi_gro_receive(&q_vector->napi, new_skb);
-#endif
-}
-
-#endif /* IGB_NO_LRO */
-/**
- * igb_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, timestamp, protocol, and
- * other fields within the skb.
- **/
-static void igb_process_skb_fields(struct igb_ring *rx_ring,
-				   union e1000_adv_rx_desc *rx_desc,
-				   struct sk_buff *skb)
-{
-	struct net_device *dev = rx_ring->netdev;
-	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
-
-#ifdef NETIF_F_RXHASH
-	igb_rx_hash(rx_ring, rx_desc, skb);
-
-#endif
-	igb_rx_checksum(rx_ring, rx_desc, skb);
-
-    /* update packet type stats */
-	if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))
-		rx_ring->rx_stats.ipv4_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX))
-		rx_ring->rx_stats.ipv4e_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6))
-		rx_ring->rx_stats.ipv6_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX))
-		rx_ring->rx_stats.ipv6e_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP))
-		rx_ring->rx_stats.tcp_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP))
-		rx_ring->rx_stats.udp_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP))
-		rx_ring->rx_stats.sctp_packets++;
-	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS))
-		rx_ring->rx_stats.nfs_packets++;
-
-#ifdef HAVE_PTP_1588_CLOCK
-	igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
-#endif /* HAVE_PTP_1588_CLOCK */
-
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-#else
-	if ((dev->features & NETIF_F_HW_VLAN_RX) &&
-#endif
-	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
-		u16 vid = 0;
-		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
-		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
-			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
-		else
-			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
-#ifdef HAVE_VLAN_RX_REGISTER
-		IGB_CB(skb)->vid = vid;
-	} else {
-		IGB_CB(skb)->vid = 0;
-#else
-
-#ifdef HAVE_VLAN_PROTOCOL
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
-#else
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
-#endif
-
-
-#endif
-	}
-
-	skb_record_rx_queue(skb, rx_ring->queue_index);
-
-	skb->protocol = eth_type_trans(skb, dev);
-}
-
-/**
- * igb_is_non_eop - process handling of non-EOP buffers
- * @rx_ring: Rx ring being processed
- * @rx_desc: Rx descriptor for current buffer
- *
- * This function updates next to clean.  If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
- **/
-static bool igb_is_non_eop(struct igb_ring *rx_ring,
-			   union e1000_adv_rx_desc *rx_desc)
-{
-	u32 ntc = rx_ring->next_to_clean + 1;
-
-	/* fetch, update, and store next to clean */
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-
-	prefetch(IGB_RX_DESC(rx_ring, ntc));
-
-	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
-		return false;
-
-	return true;
-}
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-/* igb_clean_rx_irq -- * legacy */
-static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
-{
-	struct igb_ring *rx_ring = q_vector->rx.ring;
-	unsigned int total_bytes = 0, total_packets = 0;
-	u16 cleaned_count = igb_desc_unused(rx_ring);
-
-	do {
-		struct igb_rx_buffer *rx_buffer;
-		union e1000_adv_rx_desc *rx_desc;
-		struct sk_buff *skb;
-		u16 ntc;
-
-		/* return some buffers to hardware, one at a time is too slow */
-		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
-			igb_alloc_rx_buffers(rx_ring, cleaned_count);
-			cleaned_count = 0;
-		}
-
-		ntc = rx_ring->next_to_clean;
-		rx_desc = IGB_RX_DESC(rx_ring, ntc);
-		rx_buffer = &rx_ring->rx_buffer_info[ntc];
-
-		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
-			break;
-
-		/*
-		 * This memory barrier is needed to keep us from reading
-		 * any other fields out of the rx_desc until we know the
-		 * RXD_STAT_DD bit is set
-		 */
-		rmb();
-
-		skb = rx_buffer->skb;
-
-		prefetch(skb->data);
-
-		/* pull the header of the skb in */
-		__skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
-
-		/* clear skb reference in buffer info structure */
-		rx_buffer->skb = NULL;
-
-		cleaned_count++;
-
-		BUG_ON(igb_is_non_eop(rx_ring, rx_desc));
-
-		dma_unmap_single(rx_ring->dev, rx_buffer->dma,
-				 rx_ring->rx_buffer_len,
-				 DMA_FROM_DEVICE);
-		rx_buffer->dma = 0;
-
-		if (igb_test_staterr(rx_desc,
-				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
-			dev_kfree_skb_any(skb);
-			continue;
-		}
-
-		total_bytes += skb->len;
-
-		/* populate checksum, timestamp, VLAN, and protocol */
-		igb_process_skb_fields(rx_ring, rx_desc, skb);
-
-#ifndef IGB_NO_LRO
-		if (igb_can_lro(rx_ring, rx_desc, skb))
-			igb_lro_receive(q_vector, skb);
-		else
-#endif
-#ifdef HAVE_VLAN_RX_REGISTER
-			igb_receive_skb(q_vector, skb);
-#else
-			napi_gro_receive(&q_vector->napi, skb);
-#endif
-
-#ifndef NETIF_F_GRO
-		netdev_ring(rx_ring)->last_rx = jiffies;
-
-#endif
-		/* update budget accounting */
-		total_packets++;
-	} while (likely(total_packets < budget));
-
-	rx_ring->rx_stats.packets += total_packets;
-	rx_ring->rx_stats.bytes += total_bytes;
-	q_vector->rx.total_packets += total_packets;
-	q_vector->rx.total_bytes += total_bytes;
-
-	if (cleaned_count)
-		igb_alloc_rx_buffers(rx_ring, cleaned_count);
-
-#ifndef IGB_NO_LRO
-	igb_lro_flush_all(q_vector);
-
-#endif /* IGB_NO_LRO */
-	return total_packets < budget;
-}
-#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-/**
- * igb_get_headlen - determine size of header for LRO/GRO
- * @data: pointer to the start of the headers
- * @max_len: total length of section to find headers in
- *
- * This function is meant to determine the length of headers that will
- * be recognized by hardware for LRO, and GRO offloads.  The main
- * motivation of doing this is to only perform one pull for IPv4 TCP
- * packets so that we can do basic things like calculating the gso_size
- * based on the average data per packet.
- **/
-static unsigned int igb_get_headlen(unsigned char *data,
-				    unsigned int max_len)
-{
-	union {
-		unsigned char *network;
-		/* l2 headers */
-		struct ethhdr *eth;
-		struct vlan_hdr *vlan;
-		/* l3 headers */
-		struct iphdr *ipv4;
-		struct ipv6hdr *ipv6;
-	} hdr;
-	__be16 protocol;
-	u8 nexthdr = 0;	/* default to not TCP */
-	u8 hlen;
-
-	/* this should never happen, but better safe than sorry */
-	if (max_len < ETH_HLEN)
-		return max_len;
-
-	/* initialize network frame pointer */
-	hdr.network = data;
-
-	/* set first protocol and move network header forward */
-	protocol = hdr.eth->h_proto;
-	hdr.network += ETH_HLEN;
-
-	/* handle any vlan tag if present */
-	if (protocol == __constant_htons(ETH_P_8021Q)) {
-		if ((hdr.network - data) > (max_len - VLAN_HLEN))
-			return max_len;
-
-		protocol = hdr.vlan->h_vlan_encapsulated_proto;
-		hdr.network += VLAN_HLEN;
-	}
-
-	/* handle L3 protocols */
-	if (protocol == __constant_htons(ETH_P_IP)) {
-		if ((hdr.network - data) > (max_len - sizeof(struct iphdr)))
-			return max_len;
-
-		/* access ihl as a u8 to avoid unaligned access on ia64 */
-		hlen = (hdr.network[0] & 0x0F) << 2;
-
-		/* verify hlen meets minimum size requirements */
-		if (hlen < sizeof(struct iphdr))
-			return hdr.network - data;
-
-		/* record next protocol if header is present */
-		if (!(hdr.ipv4->frag_off & htons(IP_OFFSET)))
-			nexthdr = hdr.ipv4->protocol;
-#ifdef NETIF_F_TSO6
-	} else if (protocol == __constant_htons(ETH_P_IPV6)) {
-		if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr)))
-			return max_len;
-
-		/* record next protocol */
-		nexthdr = hdr.ipv6->nexthdr;
-		hlen = sizeof(struct ipv6hdr);
-#endif /* NETIF_F_TSO6 */
-	} else {
-		return hdr.network - data;
-	}
-
-	/* relocate pointer to start of L4 header */
-	hdr.network += hlen;
-
-	/* finally sort out TCP */
-	if (nexthdr == IPPROTO_TCP) {
-		if ((hdr.network - data) > (max_len - sizeof(struct tcphdr)))
-			return max_len;
-
-		/* access doff as a u8 to avoid unaligned access on ia64 */
-		hlen = (hdr.network[12] & 0xF0) >> 2;
-
-		/* verify hlen meets minimum size requirements */
-		if (hlen < sizeof(struct tcphdr))
-			return hdr.network - data;
-
-		hdr.network += hlen;
-	} else if (nexthdr == IPPROTO_UDP) {
-		if ((hdr.network - data) > (max_len - sizeof(struct udphdr)))
-			return max_len;
-
-		hdr.network += sizeof(struct udphdr);
-	}
-
-	/*
-	 * If everything has gone correctly hdr.network should be the
-	 * data section of the packet and will be the end of the header.
-	 * If not then it probably represents the end of the last recognized
-	 * header.
-	 */
-	if ((hdr.network - data) < max_len)
-		return hdr.network - data;
-	else
-		return max_len;
-}
-
-/**
- * igb_pull_tail - igb specific version of skb_pull_tail
- * @rx_ring: rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being adjusted
- *
- * This function is an igb specific version of __pskb_pull_tail.  The
- * main difference between this version and the original function is that
- * this function can make several assumptions about the state of things
- * that allow for significant optimizations versus the standard function.
- * As a result we can do things like drop a frag and maintain an accurate
- * truesize for the skb.
- */
-static void igb_pull_tail(struct igb_ring *rx_ring,
-			  union e1000_adv_rx_desc *rx_desc,
-			  struct sk_buff *skb)
-{
-	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
-	unsigned char *va;
-	unsigned int pull_len;
-
-	/*
-	 * it is valid to use page_address instead of kmap since we are
-	 * working with pages allocated out of the lomem pool per
-	 * alloc_page(GFP_ATOMIC)
-	 */
-	va = skb_frag_address(frag);
-
-#ifdef HAVE_PTP_1588_CLOCK
-	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-		/* retrieve timestamp from buffer */
-		igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
-
-		/* update pointers to remove timestamp header */
-		skb_frag_size_sub(frag, IGB_TS_HDR_LEN);
-		frag->page_offset += IGB_TS_HDR_LEN;
-		skb->data_len -= IGB_TS_HDR_LEN;
-		skb->len -= IGB_TS_HDR_LEN;
-
-		/* move va to start of packet data */
-		va += IGB_TS_HDR_LEN;
-	}
-#endif /* HAVE_PTP_1588_CLOCK */
-
-	/*
-	 * we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
-
-	/* update all of the pointers */
-	skb_frag_size_sub(frag, pull_len);
-	frag->page_offset += pull_len;
-	skb->data_len -= pull_len;
-	skb->tail += pull_len;
-}
-
-/**
- * igb_cleanup_headers - Correct corrupted or empty headers
- * @rx_ring: rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being fixed
- *
- * Address the case where we are pulling data in on pages only
- * and as such no data is present in the skb header.
- *
- * In addition if skb is not at least 60 bytes we need to pad it so that
- * it is large enough to qualify as a valid Ethernet frame.
- *
- * Returns true if an error was encountered and skb was freed.
- **/
-static bool igb_cleanup_headers(struct igb_ring *rx_ring,
-				union e1000_adv_rx_desc *rx_desc,
-				struct sk_buff *skb)
-{
-
-	if (unlikely((igb_test_staterr(rx_desc,
-				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
-		struct net_device *netdev = rx_ring->netdev;
-		if (!(netdev->features & NETIF_F_RXALL)) {
-			dev_kfree_skb_any(skb);
-			return true;
-		}
-	}
-
-	/* place header in linear portion of buffer */
-	if (skb_is_nonlinear(skb))
-		igb_pull_tail(rx_ring, rx_desc, skb);
-
-	/* if skb_pad returns an error the skb was freed */
-	if (unlikely(skb->len < 60)) {
-		int pad_len = 60 - skb->len;
-
-		if (skb_pad(skb, pad_len))
-			return true;
-		__skb_put(skb, pad_len);
-	}
-
-	return false;
-}
-
-/* igb_clean_rx_irq -- * packet split */
-static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
-{
-	struct igb_ring *rx_ring = q_vector->rx.ring;
-	struct sk_buff *skb = rx_ring->skb;
-	unsigned int total_bytes = 0, total_packets = 0;
-	u16 cleaned_count = igb_desc_unused(rx_ring);
-
-	do {
-		union e1000_adv_rx_desc *rx_desc;
-
-		/* return some buffers to hardware, one at a time is too slow */
-		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
-			igb_alloc_rx_buffers(rx_ring, cleaned_count);
-			cleaned_count = 0;
-		}
-
-		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
-			break;
-
-		/*
-		 * This memory barrier is needed to keep us from reading
-		 * any other fields out of the rx_desc until we know the
-		 * RXD_STAT_DD bit is set
-		 */
-		rmb();
-
-		/* retrieve a buffer from the ring */
-		skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
-
-		/* exit if we failed to retrieve a buffer */
-		if (!skb)
-			break;
-
-		cleaned_count++;
-
-		/* fetch next buffer in frame if non-eop */
-		if (igb_is_non_eop(rx_ring, rx_desc))
-			continue;
-
-		/* verify the packet layout is correct */
-		if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
-			skb = NULL;
-			continue;
-		}
-
-		/* probably a little skewed due to removing CRC */
-		total_bytes += skb->len;
-
-		/* populate checksum, timestamp, VLAN, and protocol */
-		igb_process_skb_fields(rx_ring, rx_desc, skb);
-
-#ifndef IGB_NO_LRO
-		if (igb_can_lro(rx_ring, rx_desc, skb))
-			igb_lro_receive(q_vector, skb);
-		else
-#endif
-#ifdef HAVE_VLAN_RX_REGISTER
-			igb_receive_skb(q_vector, skb);
-#else
-			napi_gro_receive(&q_vector->napi, skb);
-#endif
-#ifndef NETIF_F_GRO
-
-		netdev_ring(rx_ring)->last_rx = jiffies;
-#endif
-
-		/* reset skb pointer */
-		skb = NULL;
-
-		/* update budget accounting */
-		total_packets++;
-	} while (likely(total_packets < budget));
-
-	/* place incomplete frames back on ring for completion */
-	rx_ring->skb = skb;
-
-	rx_ring->rx_stats.packets += total_packets;
-	rx_ring->rx_stats.bytes += total_bytes;
-	q_vector->rx.total_packets += total_packets;
-	q_vector->rx.total_bytes += total_bytes;
-
-	if (cleaned_count)
-		igb_alloc_rx_buffers(rx_ring, cleaned_count);
-
-#ifndef IGB_NO_LRO
-	igb_lro_flush_all(q_vector);
-
-#endif /* IGB_NO_LRO */
-	return total_packets < budget;
-}
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
-				 struct igb_rx_buffer *bi)
-{
-	struct sk_buff *skb = bi->skb;
-	dma_addr_t dma = bi->dma;
-
-	if (dma)
-		return true;
-
-	if (likely(!skb)) {
-		skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
-						rx_ring->rx_buffer_len);
-		bi->skb = skb;
-		if (!skb) {
-			rx_ring->rx_stats.alloc_failed++;
-			return false;
-		}
-
-		/* initialize skb for ring */
-		skb_record_rx_queue(skb, ring_queue_index(rx_ring));
-	}
-
-	dma = dma_map_single(rx_ring->dev, skb->data,
-			     rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
-
-	/* if mapping failed free memory back to system since
-	 * there isn't much point in holding memory we can't use
-	 */
-	if (dma_mapping_error(rx_ring->dev, dma)) {
-		dev_kfree_skb_any(skb);
-		bi->skb = NULL;
-
-		rx_ring->rx_stats.alloc_failed++;
-		return false;
-	}
-
-	bi->dma = dma;
-	return true;
-}
-
-#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
-				  struct igb_rx_buffer *bi)
-{
-	struct page *page = bi->page;
-	dma_addr_t dma;
-
-	/* since we are recycling buffers we should seldom need to alloc */
-	if (likely(page))
-		return true;
-
-	/* alloc new page for storage */
-	page = alloc_page(GFP_ATOMIC | __GFP_COLD);
-	if (unlikely(!page)) {
-		rx_ring->rx_stats.alloc_failed++;
-		return false;
-	}
-
-	/* map page for use */
-	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	/*
-	 * if mapping failed free memory back to system since
-	 * there isn't much point in holding memory we can't use
-	 */
-	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_page(page);
-
-		rx_ring->rx_stats.alloc_failed++;
-		return false;
-	}
-
-	bi->dma = dma;
-	bi->page = page;
-	bi->page_offset = 0;
-
-	return true;
-}
-
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-/**
- * igb_alloc_rx_buffers - Replace used receive buffers; packet split
- * @adapter: address of board private structure
- **/
-void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
-{
-	union e1000_adv_rx_desc *rx_desc;
-	struct igb_rx_buffer *bi;
-	u16 i = rx_ring->next_to_use;
-
-	/* nothing to do */
-	if (!cleaned_count)
-		return;
-
-	rx_desc = IGB_RX_DESC(rx_ring, i);
-	bi = &rx_ring->rx_buffer_info[i];
-	i -= rx_ring->count;
-
-	do {
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		if (!igb_alloc_mapped_skb(rx_ring, bi))
-#else
-		if (!igb_alloc_mapped_page(rx_ring, bi))
-#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
-			break;
-
-		/*
-		 * Refresh the desc even if buffer_addrs didn't change
-		 * because each write-back erases this info.
-		 */
-#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
-#else
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
-#endif
-
-		rx_desc++;
-		bi++;
-		i++;
-		if (unlikely(!i)) {
-			rx_desc = IGB_RX_DESC(rx_ring, 0);
-			bi = rx_ring->rx_buffer_info;
-			i -= rx_ring->count;
-		}
-
-		/* clear the hdr_addr for the next_to_use descriptor */
-		rx_desc->read.hdr_addr = 0;
-
-		cleaned_count--;
-	} while (cleaned_count);
-
-	i += rx_ring->count;
-
-	if (rx_ring->next_to_use != i) {
-		/* record the next descriptor to use */
-		rx_ring->next_to_use = i;
-
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-		/* update next to alloc since we have filled the ring */
-		rx_ring->next_to_alloc = i;
-
-#endif
-		/*
-		 * Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		writel(i, rx_ring->tail);
-	}
-}
-
-#ifdef SIOCGMIIPHY
-/**
- * igb_mii_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
- **/
-static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct mii_ioctl_data *data = if_mii(ifr);
-
-	if (adapter->hw.phy.media_type != e1000_media_type_copper)
-		return -EOPNOTSUPP;
-
-	switch (cmd) {
-	case SIOCGMIIPHY:
-		data->phy_id = adapter->hw.phy.addr;
-		break;
-	case SIOCGMIIREG:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
-				   &data->val_out))
-			return -EIO;
-		break;
-	case SIOCSMIIREG:
-	default:
-		return -EOPNOTSUPP;
-	}
-	return E1000_SUCCESS;
-}
-
-#endif
-/**
- * igb_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
- **/
-static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
-	switch (cmd) {
-#ifdef SIOCGMIIPHY
-	case SIOCGMIIPHY:
-	case SIOCGMIIREG:
-	case SIOCSMIIREG:
-		return igb_mii_ioctl(netdev, ifr, cmd);
-#endif
-#ifdef HAVE_PTP_1588_CLOCK
-	case SIOCSHWTSTAMP:
-		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
-#endif /* HAVE_PTP_1588_CLOCK */
-#ifdef ETHTOOL_OPS_COMPAT
-	case SIOCETHTOOL:
-		return ethtool_ioctl(ifr);
-#endif
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
-{
-	struct igb_adapter *adapter = hw->back;
-	u16 cap_offset;
-
-	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
-	if (!cap_offset)
-		return -E1000_ERR_CONFIG;
-
-	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
-
-	return E1000_SUCCESS;
-}
-
-s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
-{
-	struct igb_adapter *adapter = hw->back;
-	u16 cap_offset;
-
-	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
-	if (!cap_offset)
-		return -E1000_ERR_CONFIG;
-
-	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
-
-	return E1000_SUCCESS;
-}
-
-#ifdef HAVE_VLAN_RX_REGISTER
-static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
-#else
-void igb_vlan_mode(struct net_device *netdev, u32 features)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl, rctl;
-	int i;
-#ifdef HAVE_VLAN_RX_REGISTER
-	bool enable = !!vlgrp;
-
-	igb_irq_disable(adapter);
-
-	adapter->vlgrp = vlgrp;
-
-	if (!test_bit(__IGB_DOWN, &adapter->state))
-		igb_irq_enable(adapter);
-#else
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
-#else
-	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
-#endif
-#endif
-
-	if (enable) {
-		/* enable VLAN tag insert/strip */
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl |= E1000_CTRL_VME;
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-		/* Disable CFI check */
-		rctl = E1000_READ_REG(hw, E1000_RCTL);
-		rctl &= ~E1000_RCTL_CFIEN;
-		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-	} else {
-		/* disable VLAN tag insert/strip */
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		ctrl &= ~E1000_CTRL_VME;
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-	}
-
-#ifndef CONFIG_IGB_VMDQ_NETDEV
-	for (i = 0; i < adapter->vmdq_pools; i++) {
-		igb_set_vf_vlan_strip(adapter,
-				      adapter->vfs_allocated_count + i,
-				      enable);
-	}
-
-#else
-	igb_set_vf_vlan_strip(adapter,
-			      adapter->vfs_allocated_count,
-			      enable);
-
-	for (i = 1; i < adapter->vmdq_pools; i++) {
-#ifdef HAVE_VLAN_RX_REGISTER
-		struct igb_vmdq_adapter *vadapter;
-		vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
-		enable = !!vadapter->vlgrp;
-#else
-		struct net_device *vnetdev;
-		vnetdev = adapter->vmdq_netdev[i-1];
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-		enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX);
-#else
-		enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
-#endif
-#endif
-		igb_set_vf_vlan_strip(adapter,
-				      adapter->vfs_allocated_count + i,
-				      enable);
-	}
-
-#endif
-	igb_rlpml_set(adapter);
-}
-
-#ifdef HAVE_VLAN_PROTOCOL
-static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
-#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-static int igb_vlan_rx_add_vid(struct net_device *netdev,
-			       __always_unused __be16 proto, u16 vid)
-#else
-static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
-#endif
-#else
-static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	int pf_id = adapter->vfs_allocated_count;
-
-	/* attempt to add filter to vlvf array */
-	igb_vlvf_set(adapter, vid, TRUE, pf_id);
-
-	/* add the filter since PF can receive vlans w/o entry in vlvf */
-	igb_vfta_set(adapter, vid, TRUE);
-#ifndef HAVE_NETDEV_VLAN_FEATURES
-
-	/* Copy feature flags from netdev to the vlan netdev for this vid.
-	 * This allows things like TSO to bubble down to our vlan device.
-	 * There is no need to update netdev for vlan 0 (DCB), since it
-	 * wouldn't has v_netdev.
-	 */
-	if (adapter->vlgrp) {
-		struct vlan_group *vlgrp = adapter->vlgrp;
-		struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
-		if (v_netdev) {
-			v_netdev->features |= netdev->features;
-			vlan_group_set_device(vlgrp, vid, v_netdev);
-		}
-	}
-#endif
-#ifndef HAVE_VLAN_RX_REGISTER
-
-	set_bit(vid, adapter->active_vlans);
-#endif
-#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
-	return 0;
-#endif
-}
-
-#ifdef HAVE_VLAN_PROTOCOL
-static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
-#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-static int igb_vlan_rx_kill_vid(struct net_device *netdev,
-				__always_unused __be16 proto, u16 vid)
-#else
-static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
-#endif
-#else
-static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
-#endif
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	int pf_id = adapter->vfs_allocated_count;
-	s32 err;
-
-#ifdef HAVE_VLAN_RX_REGISTER
-	igb_irq_disable(adapter);
-
-	vlan_group_set_device(adapter->vlgrp, vid, NULL);
-
-	if (!test_bit(__IGB_DOWN, &adapter->state))
-		igb_irq_enable(adapter);
-
-#endif /* HAVE_VLAN_RX_REGISTER */
-	/* remove vlan from VLVF table array */
-	err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
-
-	/* if vid was not present in VLVF just remove it from table */
-	if (err)
-		igb_vfta_set(adapter, vid, FALSE);
-#ifndef HAVE_VLAN_RX_REGISTER
-
-	clear_bit(vid, adapter->active_vlans);
-#endif
-#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
-	return 0;
-#endif
-}
-
-static void igb_restore_vlan(struct igb_adapter *adapter)
-{
-#ifdef HAVE_VLAN_RX_REGISTER
-	igb_vlan_mode(adapter->netdev, adapter->vlgrp);
-
-	if (adapter->vlgrp) {
-		u16 vid;
-		for (vid = 0; vid < VLAN_N_VID; vid++) {
-			if (!vlan_group_get_device(adapter->vlgrp, vid))
-				continue;
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-			igb_vlan_rx_add_vid(adapter->netdev,
-					    htons(ETH_P_8021Q), vid);
-#else
-			igb_vlan_rx_add_vid(adapter->netdev, vid);
-#endif
-		}
-	}
-#else
-	u16 vid;
-
-	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
-
-	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-		igb_vlan_rx_add_vid(adapter->netdev,
-				    htons(ETH_P_8021Q), vid);
-#else
-		igb_vlan_rx_add_vid(adapter->netdev, vid);
-#endif
-#endif
-}
-
-int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_mac_info *mac = &adapter->hw.mac;
-
-	mac->autoneg = 0;
-
-	/* SerDes device's does not support 10Mbps Full/duplex
-	 * and 100Mbps Half duplex
-	 */
-	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
-		switch (spddplx) {
-		case SPEED_10 + DUPLEX_HALF:
-		case SPEED_10 + DUPLEX_FULL:
-		case SPEED_100 + DUPLEX_HALF:
-			dev_err(pci_dev_to_dev(pdev),
-				"Unsupported Speed/Duplex configuration\n");
-			return -EINVAL;
-		default:
-			break;
-		}
-	}
-
-	switch (spddplx) {
-	case SPEED_10 + DUPLEX_HALF:
-		mac->forced_speed_duplex = ADVERTISE_10_HALF;
-		break;
-	case SPEED_10 + DUPLEX_FULL:
-		mac->forced_speed_duplex = ADVERTISE_10_FULL;
-		break;
-	case SPEED_100 + DUPLEX_HALF:
-		mac->forced_speed_duplex = ADVERTISE_100_HALF;
-		break;
-	case SPEED_100 + DUPLEX_FULL:
-		mac->forced_speed_duplex = ADVERTISE_100_FULL;
-		break;
-	case SPEED_1000 + DUPLEX_FULL:
-		mac->autoneg = 1;
-		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
-		break;
-	case SPEED_1000 + DUPLEX_HALF: /* not supported */
-	default:
-		dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
-		return -EINVAL;
-	}
-
-	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
-	adapter->hw.phy.mdix = AUTO_ALL_MODES;
-
-	return 0;
-}
-
-static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
-			  bool runtime)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 ctrl, rctl, status;
-	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
-#ifdef CONFIG_PM
-	int retval = 0;
-#endif
-
-	netif_device_detach(netdev);
-
-	status = E1000_READ_REG(hw, E1000_STATUS);
-	if (status & E1000_STATUS_LU)
-		wufc &= ~E1000_WUFC_LNKC;
-
-	if (netif_running(netdev))
-		__igb_close(netdev, true);
-
-	igb_clear_interrupt_scheme(adapter);
-
-#ifdef CONFIG_PM
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-#endif
-
-	if (wufc) {
-		igb_setup_rctl(adapter);
-		igb_set_rx_mode(netdev);
-
-		/* turn on all-multi mode if wake on multicast is enabled */
-		if (wufc & E1000_WUFC_MC) {
-			rctl = E1000_READ_REG(hw, E1000_RCTL);
-			rctl |= E1000_RCTL_MPE;
-			E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-		}
-
-		ctrl = E1000_READ_REG(hw, E1000_CTRL);
-		/* phy power management enable */
-		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
-		ctrl |= E1000_CTRL_ADVD3WUC;
-		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
-
-		/* Allow time for pending master requests to run */
-		e1000_disable_pcie_master(hw);
-
-		E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
-		E1000_WRITE_REG(hw, E1000_WUFC, wufc);
-	} else {
-		E1000_WRITE_REG(hw, E1000_WUC, 0);
-		E1000_WRITE_REG(hw, E1000_WUFC, 0);
-	}
-
-	*enable_wake = wufc || adapter->en_mng_pt;
-	if (!*enable_wake)
-		igb_power_down_link(adapter);
-	else
-		igb_power_up_link(adapter);
-
-	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
-	 * would have already happened in close and is redundant. */
-	igb_release_hw_control(adapter);
-
-	pci_disable_device(pdev);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-static int igb_suspend(struct device *dev)
-#else
-static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-{
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-	struct pci_dev *pdev = to_pci_dev(dev);
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-	int retval;
-	bool wake;
-
-	retval = __igb_shutdown(pdev, &wake, 0);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
-}
-
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-static int igb_resume(struct device *dev)
-#else
-static int igb_resume(struct pci_dev *pdev)
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-{
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-	struct pci_dev *pdev = to_pci_dev(dev);
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 err;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	pci_save_state(pdev);
-
-	err = pci_enable_device_mem(pdev);
-	if (err) {
-		dev_err(pci_dev_to_dev(pdev),
-			"igb: Cannot enable PCI device from suspend\n");
-		return err;
-	}
-	pci_set_master(pdev);
-
-	pci_enable_wake(pdev, PCI_D3hot, 0);
-	pci_enable_wake(pdev, PCI_D3cold, 0);
-
-	if (igb_init_interrupt_scheme(adapter, true)) {
-		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
-		return -ENOMEM;
-	}
-
-	igb_reset(adapter);
-
-	/* let the f/w know that the h/w is now under the control of the
-	 * driver. */
-	igb_get_hw_control(adapter);
-
-	E1000_WRITE_REG(hw, E1000_WUS, ~0);
-
-	if (netdev->flags & IFF_UP) {
-		rtnl_lock();
-		err = __igb_open(netdev, true);
-		rtnl_unlock();
-		if (err)
-			return err;
-	}
-
-	netif_device_attach(netdev);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_RUNTIME
-#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
-static int igb_runtime_idle(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (!igb_has_link(adapter))
-		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
-
-	return -EBUSY;
-}
-
-static int igb_runtime_suspend(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int retval;
-	bool wake;
-
-	retval = __igb_shutdown(pdev, &wake, 1);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
-}
-
-static int igb_runtime_resume(struct device *dev)
-{
-	return igb_resume(dev);
-}
-#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
-#endif /* CONFIG_PM_RUNTIME */
-#endif /* CONFIG_PM */
-
-#ifdef USE_REBOOT_NOTIFIER
-/* only want to do this for 2.4 kernels? */
-static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
-                             void *p)
-{
-	struct pci_dev *pdev = NULL;
-	bool wake;
-
-	switch (event) {
-	case SYS_DOWN:
-	case SYS_HALT:
-	case SYS_POWER_OFF:
-		while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
-			if (pci_dev_driver(pdev) == &igb_driver) {
-				__igb_shutdown(pdev, &wake, 0);
-				if (event == SYS_POWER_OFF) {
-					pci_wake_from_d3(pdev, wake);
-					pci_set_power_state(pdev, PCI_D3hot);
-				}
-			}
-		}
-	}
-	return NOTIFY_DONE;
-}
-#else
-static void igb_shutdown(struct pci_dev *pdev)
-{
-	bool wake = false;
-
-	__igb_shutdown(pdev, &wake, 0);
-
-	if (system_state == SYSTEM_POWER_OFF) {
-		pci_wake_from_d3(pdev, wake);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-}
-#endif /* USE_REBOOT_NOTIFIER */
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/*
- * Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-static void igb_netpoll(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct igb_q_vector *q_vector;
-	int i;
-
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		q_vector = adapter->q_vector[i];
-		if (adapter->msix_entries)
-			E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
-		else
-			igb_irq_disable(adapter);
-		napi_schedule(&q_vector->napi);
-	}
-}
-#endif /* CONFIG_NET_POLL_CONTROLLER */
-
-#ifdef HAVE_PCI_ERS
-#define E1000_DEV_ID_82576_VF 0x10CA
-/**
- * igb_io_error_detected - called when PCI error is detected
- * @pdev: Pointer to PCI device
- * @state: The current pci connection state
- *
- * This function is called after a PCI bus error affecting
- * this device has been detected.
- */
-static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
-					      pci_channel_state_t state)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-#ifdef CONFIG_PCI_IOV__UNUSED
-	struct pci_dev *bdev, *vfdev;
-	u32 dw0, dw1, dw2, dw3;
-	int vf, pos;
-	u16 req_id, pf_func;
-
-	if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
-		goto skip_bad_vf_detection;
-
-	bdev = pdev->bus->self;
-	while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
-		bdev = bdev->bus->self;
-
-	if (!bdev)
-		goto skip_bad_vf_detection;
-
-	pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
-	if (!pos)
-		goto skip_bad_vf_detection;
-
-	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
-	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
-	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
-	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
-
-	req_id = dw1 >> 16;
-	/* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
-	if (!(req_id & 0x0080))
-		goto skip_bad_vf_detection;
-
-	pf_func = req_id & 0x01;
-	if ((pf_func & 1) == (pdev->devfn & 1)) {
-
-		vf = (req_id & 0x7F) >> 1;
-		dev_err(pci_dev_to_dev(pdev),
-			"VF %d has caused a PCIe error\n", vf);
-		dev_err(pci_dev_to_dev(pdev),
-			"TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
-			"%8.8x\tdw3: %8.8x\n",
-			dw0, dw1, dw2, dw3);
-
-		/* Find the pci device of the offending VF */
-		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
-				       E1000_DEV_ID_82576_VF, NULL);
-		while (vfdev) {
-			if (vfdev->devfn == (req_id & 0xFF))
-				break;
-			vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
-					       E1000_DEV_ID_82576_VF, vfdev);
-		}
-		/*
-		 * There's a slim chance the VF could have been hot plugged,
-		 * so if it is no longer present we don't need to issue the
-		 * VFLR.  Just clean up the AER in that case.
-		 */
-		if (vfdev) {
-			dev_err(pci_dev_to_dev(pdev),
-				"Issuing VFLR to VF %d\n", vf);
-			pci_write_config_dword(vfdev, 0xA8, 0x00008000);
-		}
-
-		pci_cleanup_aer_uncorrect_error_status(pdev);
-	}
-
-	/*
-	 * Even though the error may have occurred on the other port
-	 * we still need to increment the vf error reference count for
-	 * both ports because the I/O resume function will be called
-	 * for both of them.
-	 */
-	adapter->vferr_refcount++;
-
-	return PCI_ERS_RESULT_RECOVERED;
-
-skip_bad_vf_detection:
-#endif /* CONFIG_PCI_IOV */
-
-	netif_device_detach(netdev);
-
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	if (netif_running(netdev))
-		igb_down(adapter);
-	pci_disable_device(pdev);
-
-	/* Request a slot slot reset. */
-	return PCI_ERS_RESULT_NEED_RESET;
-}
-
-/**
- * igb_io_slot_reset - called after the pci bus has been reset.
- * @pdev: Pointer to PCI device
- *
- * Restart the card from scratch, as if from a cold-boot. Implementation
- * resembles the first-half of the igb_resume routine.
- */
-static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	pci_ers_result_t result;
-
-	if (pci_enable_device_mem(pdev)) {
-		dev_err(pci_dev_to_dev(pdev),
-			"Cannot re-enable PCI device after reset.\n");
-		result = PCI_ERS_RESULT_DISCONNECT;
-	} else {
-		pci_set_master(pdev);
-		pci_restore_state(pdev);
-		pci_save_state(pdev);
-
-		pci_enable_wake(pdev, PCI_D3hot, 0);
-		pci_enable_wake(pdev, PCI_D3cold, 0);
-
-		schedule_work(&adapter->reset_task);
-		E1000_WRITE_REG(hw, E1000_WUS, ~0);
-		result = PCI_ERS_RESULT_RECOVERED;
-	}
-
-	pci_cleanup_aer_uncorrect_error_status(pdev);
-
-	return result;
-}
-
-/**
- * igb_io_resume - called when traffic can start flowing again.
- * @pdev: Pointer to PCI device
- *
- * This callback is called when the error recovery driver tells us that
- * its OK to resume normal operation. Implementation resembles the
- * second-half of the igb_resume routine.
- */
-static void igb_io_resume(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct igb_adapter *adapter = netdev_priv(netdev);
-
-	if (adapter->vferr_refcount) {
-		dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
-		adapter->vferr_refcount--;
-		return;
-	}
-
-	if (netif_running(netdev)) {
-		if (igb_up(adapter)) {
-			dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
-			return;
-		}
-	}
-
-	netif_device_attach(netdev);
-
-	/* let the f/w know that the h/w is now under the control of the
-	 * driver. */
-	igb_get_hw_control(adapter);
-}
-
-#endif /* HAVE_PCI_ERS */
-
-int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return 0;
-
-	for (i = 0; i < hw->mac.rar_entry_count; i++) {
-		if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
-			continue;
-		adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
-						   IGB_MAC_STATE_IN_USE);
-		memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
-		adapter->mac_table[i].queue = queue;
-		igb_sync_mac_table(adapter);
-		return 0;
-	}
-	return -ENOMEM;
-}
-int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
-{
-	/* search table for addr, if found, set to 0 and sync */
-	int i;
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (is_zero_ether_addr(addr))
-		return 0;
-	for (i = 0; i < hw->mac.rar_entry_count; i++) {
-		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
-		    adapter->mac_table[i].queue == queue) {
-			adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
-			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-			adapter->mac_table[i].queue = 0;
-			igb_sync_mac_table(adapter);
-			return 0;
-		}
-	}
-	return -ENOMEM;
-}
-static int igb_set_vf_mac(struct igb_adapter *adapter,
-                          int vf, unsigned char *mac_addr)
-{
-	igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
-	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
-
-	igb_add_mac_filter(adapter, mac_addr, vf);
-
-	return 0;
-}
-
-#ifdef IFLA_VF_MAX
-static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
-		return -EINVAL;
-	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
-	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
-	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
-				      " change effective.\n");
-	if (test_bit(__IGB_DOWN, &adapter->state)) {
-		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
-			 " but the PF device is not up.\n");
-		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
-			 " attempting to use the VF device.\n");
-	}
-	return igb_set_vf_mac(adapter, vf, mac);
-}
-
-static int igb_link_mbps(int internal_link_speed)
-{
-	switch (internal_link_speed) {
-	case SPEED_100:
-		return 100;
-	case SPEED_1000:
-		return 1000;
-	case SPEED_2500:
-		return 2500;
-	default:
-		return 0;
-	}
-}
-
-static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
-			int link_speed)
-{
-	int rf_dec, rf_int;
-	u32 bcnrc_val;
-
-	if (tx_rate != 0) {
-		/* Calculate the rate factor values to set */
-		rf_int = link_speed / tx_rate;
-		rf_dec = (link_speed - (rf_int * tx_rate));
-		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
-
-		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
-		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
-				E1000_RTTBCNRC_RF_INT_MASK);
-		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
-	} else {
-		bcnrc_val = 0;
-	}
-
-	E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
-	/*
-	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
-	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
-	 */
-	E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
-	E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
-}
-
-static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
-{
-	int actual_link_speed, i;
-	bool reset_rate = false;
-
-	/* VF TX rate limit was not set */
-	if ((adapter->vf_rate_link_speed == 0) ||
-		(adapter->hw.mac.type != e1000_82576))
-		return;
-
-	actual_link_speed = igb_link_mbps(adapter->link_speed);
-	if (actual_link_speed != adapter->vf_rate_link_speed) {
-		reset_rate = true;
-		adapter->vf_rate_link_speed = 0;
-		dev_info(&adapter->pdev->dev,
-		"Link speed has been changed. VF Transmit rate is disabled\n");
-	}
-
-	for (i = 0; i < adapter->vfs_allocated_count; i++) {
-		if (reset_rate)
-			adapter->vf_data[i].tx_rate = 0;
-
-		igb_set_vf_rate_limit(&adapter->hw, i,
-			adapter->vf_data[i].tx_rate, actual_link_speed);
-	}
-}
-
-#ifdef HAVE_VF_MIN_MAX_TXRATE
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
-			     int tx_rate)
-#else /* HAVE_VF_MIN_MAX_TXRATE */
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
-#endif /* HAVE_VF_MIN_MAX_TXRATE */
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	int actual_link_speed;
-
-	if (hw->mac.type != e1000_82576)
-		return -EOPNOTSUPP;
-
-#ifdef HAVE_VF_MIN_MAX_TXRATE
-	if (min_tx_rate)
-		return -EINVAL;
-#endif /* HAVE_VF_MIN_MAX_TXRATE */
-
-	actual_link_speed = igb_link_mbps(adapter->link_speed);
-	if ((vf >= adapter->vfs_allocated_count) ||
-		(!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
-		(tx_rate < 0) || (tx_rate > actual_link_speed))
-		return -EINVAL;
-
-	adapter->vf_rate_link_speed = actual_link_speed;
-	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
-	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
-
-	return 0;
-}
-
-static int igb_ndo_get_vf_config(struct net_device *netdev,
-				 int vf, struct ifla_vf_info *ivi)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	if (vf >= adapter->vfs_allocated_count)
-		return -EINVAL;
-	ivi->vf = vf;
-	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
-#ifdef HAVE_VF_MIN_MAX_TXRATE
-	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
-	ivi->min_tx_rate = 0;
-#else /* HAVE_VF_MIN_MAX_TXRATE */
-	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
-#endif /* HAVE_VF_MIN_MAX_TXRATE */
-	ivi->vlan = adapter->vf_data[vf].pf_vlan;
-	ivi->qos = adapter->vf_data[vf].pf_qos;
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
-#endif
-	return 0;
-}
-#endif
-static void igb_vmm_control(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	int count;
-	u32 reg;
-
-	switch (hw->mac.type) {
-	case e1000_82575:
-	default:
-		/* replication is not supported for 82575 */
-		return;
-	case e1000_82576:
-		/* notify HW that the MAC is adding vlan tags */
-		reg = E1000_READ_REG(hw, E1000_DTXCTL);
-		reg |= (E1000_DTXCTL_VLAN_ADDED |
-			E1000_DTXCTL_SPOOF_INT);
-		E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
-	case e1000_82580:
-		/* enable replication vlan tag stripping */
-		reg = E1000_READ_REG(hw, E1000_RPLOLR);
-		reg |= E1000_RPLOLR_STRVLAN;
-		E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
-	case e1000_i350:
-	case e1000_i354:
-		/* none of the above registers are supported by i350 */
-		break;
-	}
-
-	/* Enable Malicious Driver Detection */
-	if ((adapter->vfs_allocated_count) &&
-	    (adapter->mdd)) {
-		if (hw->mac.type == e1000_i350)
-			igb_enable_mdd(adapter);
-	}
-
-		/* enable replication and loopback support */
-		count = adapter->vfs_allocated_count || adapter->vmdq_pools;
-		if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count)
-			e1000_vmdq_set_loopback_pf(hw, 1);
-		e1000_vmdq_set_anti_spoofing_pf(hw,
-			adapter->vfs_allocated_count || adapter->vmdq_pools,
-			adapter->vfs_allocated_count);
-	e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
-				      adapter->vmdq_pools);
-}
-
-static void igb_init_fw(struct igb_adapter *adapter)
-{
-	struct e1000_fw_drv_info fw_cmd;
-	struct e1000_hw *hw = &adapter->hw;
-	int i;
-	u16 mask;
-
-	if (hw->mac.type == e1000_i210)
-		mask = E1000_SWFW_EEP_SM;
-	else
-		mask = E1000_SWFW_PHY0_SM;
-	/* i211 parts do not support this feature */
-	if (hw->mac.type == e1000_i211)
-		hw->mac.arc_subsystem_valid = false;
-
-	if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
-		for (i = 0; i <= FW_MAX_RETRIES; i++) {
-			E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
-			fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
-			fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
-			fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
-			fw_cmd.port_num = hw->bus.func;
-			fw_cmd.drv_version = FW_FAMILY_DRV_VER;
-			fw_cmd.hdr.checksum = 0;
-			fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
-			                                           (FW_HDR_LEN +
-			                                            fw_cmd.hdr.buf_len));
-			 e1000_host_interface_command(hw, (u8*)&fw_cmd,
-			                             sizeof(fw_cmd));
-			if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
-				break;
-		}
-	} else
-		dev_warn(pci_dev_to_dev(adapter->pdev),
-			 "Unable to get semaphore, firmware init failed.\n");
-	hw->mac.ops.release_swfw_sync(hw, mask);
-}
-
-static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 dmac_thr;
-	u16 hwm;
-	u32 status;
-
-	if (hw->mac.type == e1000_i211)
-		return;
-
-	if (hw->mac.type > e1000_82580) {
-		if (adapter->dmac != IGB_DMAC_DISABLE) {
-			u32 reg;
-
-			/* force threshold to 0.  */
-			E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
-
-			/*
-			 * DMA Coalescing high water mark needs to be greater
-			 * than the Rx threshold. Set hwm to PBA - max frame
-			 * size in 16B units, capping it at PBA - 6KB.
-			 */
-			hwm = 64 * pba - adapter->max_frame_size / 16;
-			if (hwm < 64 * (pba - 6))
-				hwm = 64 * (pba - 6);
-			reg = E1000_READ_REG(hw, E1000_FCRTC);
-			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
-			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
-				& E1000_FCRTC_RTH_COAL_MASK);
-			E1000_WRITE_REG(hw, E1000_FCRTC, reg);
-
-			/*
-			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
-			 * frame size, capping it at PBA - 10KB.
-			 */
-			dmac_thr = pba - adapter->max_frame_size / 512;
-			if (dmac_thr < pba - 10)
-				dmac_thr = pba - 10;
-			reg = E1000_READ_REG(hw, E1000_DMACR);
-			reg &= ~E1000_DMACR_DMACTHR_MASK;
-			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
-				& E1000_DMACR_DMACTHR_MASK);
-
-			/* transition to L0x or L1 if available..*/
-			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
-
-			/* Check if status is 2.5Gb backplane connection
-			 * before configuration of watchdog timer, which is
-			 * in msec values in 12.8usec intervals
-			 * watchdog timer= msec values in 32usec intervals
-			 * for non 2.5Gb connection
-			 */
-			if (hw->mac.type == e1000_i354) {
-				status = E1000_READ_REG(hw, E1000_STATUS);
-				if ((status & E1000_STATUS_2P5_SKU) &&
-				    (!(status & E1000_STATUS_2P5_SKU_OVER)))
-					reg |= ((adapter->dmac * 5) >> 6);
-				else
-					reg |= ((adapter->dmac) >> 5);
-			} else {
-				reg |= ((adapter->dmac) >> 5);
-			}
-
-			/*
-			 * Disable BMC-to-OS Watchdog enable
-			 * on devices that support OS-to-BMC
-			 */
-			if (hw->mac.type != e1000_i354)
-				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
-			E1000_WRITE_REG(hw, E1000_DMACR, reg);
-
-			/* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
-			E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
-
-			/* This sets the time to wait before requesting
-			 * transition to low power state to number of usecs
-			 * needed to receive 1 512 byte frame at gigabit
-			 * line rate. On i350 device, time to make transition
-			 * to Lx state is delayed by 4 usec with flush disable
-			 * bit set to avoid losing mailbox interrupts
-			 */
-			reg = E1000_READ_REG(hw, E1000_DMCTLX);
-			if (hw->mac.type == e1000_i350)
-				reg |= IGB_DMCTLX_DCFLUSH_DIS;
-
-			/* in 2.5Gb connection, TTLX unit is 0.4 usec
-			 * which is 0x4*2 = 0xA. But delay is still 4 usec
-			 */
-			if (hw->mac.type == e1000_i354) {
-				status = E1000_READ_REG(hw, E1000_STATUS);
-				if ((status & E1000_STATUS_2P5_SKU) &&
-				    (!(status & E1000_STATUS_2P5_SKU_OVER)))
-					reg |= 0xA;
-				else
-					reg |= 0x4;
-			} else {
-				reg |= 0x4;
-			}
-			E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
-
-			/* free space in tx packet buffer to wake from DMA coal */
-			E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
-				(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
-
-			/* make low power state decision controlled by DMA coal */
-			reg = E1000_READ_REG(hw, E1000_PCIEMISC);
-			reg &= ~E1000_PCIEMISC_LX_DECISION;
-			E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
-		} /* endif adapter->dmac is not disabled */
-	} else if (hw->mac.type == e1000_82580) {
-		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
-		E1000_WRITE_REG(hw, E1000_PCIEMISC,
-		                reg & ~E1000_PCIEMISC_LX_DECISION);
-		E1000_WRITE_REG(hw, E1000_DMACR, 0);
-	}
-}
-
-#ifdef HAVE_I2C_SUPPORT
-/*  igb_read_i2c_byte - Reads 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to read
- *  @dev_addr: device address
- *  @data: value read
- *
- *  Performs byte read operation over I2C interface at
- *  a specified device address.
- */
-s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data)
-{
-	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
-	struct i2c_client *this_client = adapter->i2c_client;
-	s32 status;
-	u16 swfw_mask = 0;
-
-	if (!this_client)
-		return E1000_ERR_I2C;
-
-	swfw_mask = E1000_SWFW_PHY0_SM;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
-	    != E1000_SUCCESS)
-		return E1000_ERR_SWFW_SYNC;
-
-	status = i2c_smbus_read_byte_data(this_client, byte_offset);
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-	if (status < 0)
-		return E1000_ERR_I2C;
-	else {
-		*data = status;
-		return E1000_SUCCESS;
-	}
-}
-
-/*  igb_write_i2c_byte - Writes 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @dev_addr: device address
- *  @data: value to write
- *
- *  Performs byte write operation over I2C interface at
- *  a specified device address.
- */
-s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data)
-{
-	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
-	struct i2c_client *this_client = adapter->i2c_client;
-	s32 status;
-	u16 swfw_mask = E1000_SWFW_PHY0_SM;
-
-	if (!this_client)
-		return E1000_ERR_I2C;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS)
-		return E1000_ERR_SWFW_SYNC;
-	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-	if (status)
-		return E1000_ERR_I2C;
-	else
-		return E1000_SUCCESS;
-}
-#endif /*  HAVE_I2C_SUPPORT */
-/* igb_main.c */
-
-
-/**
- * igb_probe - Device Initialization Routine
- * @pdev: PCI device information struct
- * @ent: entry in igb_pci_tbl
- *
- * Returns 0 on success, negative on failure
- *
- * igb_probe initializes an adapter identified by a pci_dev structure.
- * The OS initialization, configuring of the adapter private structure,
- * and a hardware reset occur.
- **/
-int igb_kni_probe(struct pci_dev *pdev,
-			       struct net_device **lad_dev)
-{
-	struct net_device *netdev;
-	struct igb_adapter *adapter;
-	struct e1000_hw *hw;
-	u16 eeprom_data = 0;
-	u8 pba_str[E1000_PBANUM_LENGTH];
-	s32 ret_val;
-	static int global_quad_port_a; /* global quad port a indication */
-	int i, err, pci_using_dac = 0;
-	static int cards_found;
-
-	err = pci_enable_device_mem(pdev);
-	if (err)
-		return err;
-
-#ifdef NO_KNI
-	pci_using_dac = 0;
-	err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
-	if (!err) {
-		err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
-		if (!err)
-			pci_using_dac = 1;
-	} else {
-		err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
-		if (err) {
-			err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
-			if (err) {
-				IGB_ERR("No usable DMA configuration, "
-				        "aborting\n");
-				goto err_dma;
-			}
-		}
-	}
-
-#ifndef HAVE_ASPM_QUIRKS
-	/* 82575 requires that the pci-e link partner disable the L0s state */
-	switch (pdev->device) {
-	case E1000_DEV_ID_82575EB_COPPER:
-	case E1000_DEV_ID_82575EB_FIBER_SERDES:
-	case E1000_DEV_ID_82575GB_QUAD_COPPER:
-		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
-	default:
-		break;
-	}
-
-#endif /* HAVE_ASPM_QUIRKS */
-	err = pci_request_selected_regions(pdev,
-	                                   pci_select_bars(pdev,
-                                                           IORESOURCE_MEM),
-	                                   igb_driver_name);
-	if (err)
-		goto err_pci_reg;
-
-	pci_enable_pcie_error_reporting(pdev);
-
-	pci_set_master(pdev);
-
-	err = -ENOMEM;
-#endif /* NO_KNI */
-#ifdef HAVE_TX_MQ
-	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
-	                           IGB_MAX_TX_QUEUES);
-#else
-	netdev = alloc_etherdev(sizeof(struct igb_adapter));
-#endif /* HAVE_TX_MQ */
-	if (!netdev)
-		goto err_alloc_etherdev;
-
-	SET_MODULE_OWNER(netdev);
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	//pci_set_drvdata(pdev, netdev);
-	adapter = netdev_priv(netdev);
-	adapter->netdev = netdev;
-	adapter->pdev = pdev;
-	hw = &adapter->hw;
-	hw->back = adapter;
-	adapter->port_num = hw->bus.func;
-	adapter->msg_enable = (1 << debug) - 1;
-
-#ifdef HAVE_PCI_ERS
-	err = pci_save_state(pdev);
-	if (err)
-		goto err_ioremap;
-#endif
-	err = -EIO;
-	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
-	                      pci_resource_len(pdev, 0));
-	if (!hw->hw_addr)
-		goto err_ioremap;
-
-#ifdef HAVE_NET_DEVICE_OPS
-	netdev->netdev_ops = &igb_netdev_ops;
-#else /* HAVE_NET_DEVICE_OPS */
-	netdev->open = &igb_open;
-	netdev->stop = &igb_close;
-	netdev->get_stats = &igb_get_stats;
-#ifdef HAVE_SET_RX_MODE
-	netdev->set_rx_mode = &igb_set_rx_mode;
-#endif
-	netdev->set_multicast_list = &igb_set_rx_mode;
-	netdev->set_mac_address = &igb_set_mac;
-	netdev->change_mtu = &igb_change_mtu;
-	netdev->do_ioctl = &igb_ioctl;
-#ifdef HAVE_TX_TIMEOUT
-	netdev->tx_timeout = &igb_tx_timeout;
-#endif
-	netdev->vlan_rx_register = igb_vlan_mode;
-	netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
-	netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	netdev->poll_controller = igb_netpoll;
-#endif
-	netdev->hard_start_xmit = &igb_xmit_frame;
-#endif /* HAVE_NET_DEVICE_OPS */
-	igb_set_ethtool_ops(netdev);
-#ifdef HAVE_TX_TIMEOUT
-	netdev->watchdog_timeo = 5 * HZ;
-#endif
-
-	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
-
-	adapter->bd_number = cards_found;
-
-	/* setup the private structure */
-	err = igb_sw_init(adapter);
-	if (err)
-		goto err_sw_init;
-
-	e1000_get_bus_info(hw);
-
-	hw->phy.autoneg_wait_to_complete = FALSE;
-	hw->mac.adaptive_ifs = FALSE;
-
-	/* Copper options */
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		hw->phy.mdix = AUTO_ALL_MODES;
-		hw->phy.disable_polarity_correction = FALSE;
-		hw->phy.ms_type = e1000_ms_hw_default;
-	}
-
-	if (e1000_check_reset_block(hw))
-		dev_info(pci_dev_to_dev(pdev),
-			"PHY reset is blocked due to SOL/IDER session.\n");
-
-	/*
-	 * features is initialized to 0 in allocation, it might have bits
-	 * set by igb_sw_init so we should use an or instead of an
-	 * assignment.
-	 */
-	netdev->features |= NETIF_F_SG |
-			    NETIF_F_IP_CSUM |
-#ifdef NETIF_F_IPV6_CSUM
-			    NETIF_F_IPV6_CSUM |
-#endif
-#ifdef NETIF_F_TSO
-			    NETIF_F_TSO |
-#ifdef NETIF_F_TSO6
-			    NETIF_F_TSO6 |
-#endif
-#endif /* NETIF_F_TSO */
-#ifdef NETIF_F_RXHASH
-			    NETIF_F_RXHASH |
-#endif
-			    NETIF_F_RXCSUM |
-#ifdef NETIF_F_HW_VLAN_CTAG_RX
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_TX;
-#else
-			    NETIF_F_HW_VLAN_RX |
-			    NETIF_F_HW_VLAN_TX;
-#endif
-
-	if (hw->mac.type >= e1000_82576)
-		netdev->features |= NETIF_F_SCTP_CSUM;
-
-#ifdef HAVE_NDO_SET_FEATURES
-	/* copy netdev features into list of user selectable features */
-	netdev->hw_features |= netdev->features;
-#ifndef IGB_NO_LRO
-
-	/* give us the option of enabling LRO later */
-	netdev->hw_features |= NETIF_F_LRO;
-#endif
-#else
-#ifdef NETIF_F_GRO
-
-	/* this is only needed on kernels prior to 2.6.39 */
-	netdev->features |= NETIF_F_GRO;
-#endif
-#endif
-
-	/* set this bit last since it cannot be part of hw_features */
-#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
-	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#else
-	netdev->features |= NETIF_F_HW_VLAN_FILTER;
-#endif
-
-#ifdef HAVE_NETDEV_VLAN_FEATURES
-	netdev->vlan_features |= NETIF_F_TSO |
-				 NETIF_F_TSO6 |
-				 NETIF_F_IP_CSUM |
-				 NETIF_F_IPV6_CSUM |
-				 NETIF_F_SG;
-
-#endif
-	if (pci_using_dac)
-		netdev->features |= NETIF_F_HIGHDMA;
-
-#ifdef NO_KNI
-	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
-#ifdef DEBUG
-	if (adapter->dmac != IGB_DMAC_DISABLE)
-		printk("%s: DMA Coalescing is enabled..\n", netdev->name);
-#endif
-
-	/* before reading the NVM, reset the controller to put the device in a
-	 * known good starting state */
-	e1000_reset_hw(hw);
-#endif /* NO_KNI */
-
-	/* make sure the NVM is good */
-	if (e1000_validate_nvm_checksum(hw) < 0) {
-		dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
-		        " Valid\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	/* copy the MAC address out of the NVM */
-	if (e1000_read_mac_addr(hw))
-		dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
-	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
-#ifdef ETHTOOL_GPERMADDR
-	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
-
-	if (!is_valid_ether_addr(netdev->perm_addr)) {
-#else
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-#endif
-		dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
-	adapter->mac_table[0].queue = adapter->vfs_allocated_count;
-	adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
-	igb_rar_set(adapter, 0);
-
-	/* get firmware version for ethtool -i */
-	igb_set_fw_version(adapter);
-
-	/* Check if Media Autosense is enabled */
-	if (hw->mac.type == e1000_82580)
-		igb_init_mas(adapter);
-
-#ifdef NO_KNI
-#ifdef HAVE_TIMER_SETUP
-	timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0);
-	timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0);
-#else
-	setup_timer(&adapter->watchdog_timer, &igb_watchdog,
-	            (unsigned long) adapter);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
-			    (unsigned long) adapter);
-	setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
-	            (unsigned long) adapter);
-#endif
-
-	INIT_WORK(&adapter->reset_task, igb_reset_task);
-	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
-	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
-		INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
-#endif
-
-	/* Initialize link properties that are user-changeable */
-	adapter->fc_autoneg = true;
-	hw->mac.autoneg = true;
-	hw->phy.autoneg_advertised = 0x2f;
-
-	hw->fc.requested_mode = e1000_fc_default;
-	hw->fc.current_mode = e1000_fc_default;
-
-	e1000_validate_mdi_setting(hw);
-
-	/* By default, support wake on port A */
-	if (hw->bus.func == 0)
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-
-	/* Check the NVM for wake support for non-port A ports */
-	if (hw->mac.type >= e1000_82580)
-		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
-		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
-		                 &eeprom_data);
-	else if (hw->bus.func == 1)
-		e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
-
-	if (eeprom_data & IGB_EEPROM_APME)
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-
-	/* now that we have the eeprom settings, apply the special cases where
-	 * the eeprom may be wrong or the board simply won't support wake on
-	 * lan on a particular port */
-	switch (pdev->device) {
-	case E1000_DEV_ID_82575GB_QUAD_COPPER:
-		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	case E1000_DEV_ID_82575EB_FIBER_SERDES:
-	case E1000_DEV_ID_82576_FIBER:
-	case E1000_DEV_ID_82576_SERDES:
-		/* Wake events only supported on port A for dual fiber
-		 * regardless of eeprom setting */
-		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	case E1000_DEV_ID_82576_QUAD_COPPER:
-	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
-		/* if quad port adapter, disable WoL on all but port A */
-		if (global_quad_port_a != 0)
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		else
-			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
-		/* Reset for multiple quad port adapters */
-		if (++global_quad_port_a == 4)
-			global_quad_port_a = 0;
-		break;
-	default:
-		/* If the device can't wake, don't set software support */
-		if (!device_can_wakeup(&adapter->pdev->dev))
-			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
-		break;
-	}
-
-	/* initialize the wol settings based on the eeprom settings */
-	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
-		adapter->wol |= E1000_WUFC_MAG;
-
-	/* Some vendors want WoL disabled by default, but still supported */
-	if ((hw->mac.type == e1000_i350) &&
-	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
-		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
-		adapter->wol = 0;
-	}
-
-#ifdef NO_KNI
-	device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
-				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
-
-	/* reset the hardware with the new settings */
-	igb_reset(adapter);
-	adapter->devrc = 0;
-
-#ifdef HAVE_I2C_SUPPORT
-	/* Init the I2C interface */
-	err = igb_init_i2c(adapter);
-	if (err) {
-		dev_err(&pdev->dev, "failed to init i2c interface\n");
-		goto err_eeprom;
-	}
-#endif /* HAVE_I2C_SUPPORT */
-
-	/* let the f/w know that the h/w is now under the control of the
-	 * driver. */
-	igb_get_hw_control(adapter);
-
-	strncpy(netdev->name, "eth%d", IFNAMSIZ);
-	err = register_netdev(netdev);
-	if (err)
-		goto err_register;
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-	err = igb_init_vmdq_netdevs(adapter);
-	if (err)
-		goto err_register;
-#endif
-	/* carrier off reporting is important to ethtool even BEFORE open */
-	netif_carrier_off(netdev);
-
-#ifdef IGB_DCA
-	if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
-		adapter->flags |= IGB_FLAG_DCA_ENABLED;
-		dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
-		igb_setup_dca(adapter);
-	}
-
-#endif
-#ifdef HAVE_PTP_1588_CLOCK
-	/* do hw tstamp init after resetting */
-	igb_ptp_init(adapter);
-#endif /* HAVE_PTP_1588_CLOCK */
-
-#endif /* NO_KNI */
-	dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
-	/* print bus type/speed/width info */
-	dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
-	         netdev->name,
-	         ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
-	          (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
-		  (hw->mac.type == e1000_i354) ? "integrated" :
-	                                                    "unknown"),
-	         ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
-	          (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
-	          (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
-		  (hw->mac.type == e1000_i354) ? "integrated" :
-	           "unknown"));
-	dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
-	for (i = 0; i < 6; i++)
-		printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
-
-	ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
-	if (ret_val)
-		strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
-	dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
-		 pba_str);
-
-
-	/* Initialize the thermal sensor on i350 devices. */
-	if (hw->mac.type == e1000_i350) {
-		if (hw->bus.func == 0) {
-			u16 ets_word;
-
-			/*
-			 * Read the NVM to determine if this i350 device
-			 * supports an external thermal sensor.
-			 */
-			e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
-			if (ets_word != 0x0000 && ets_word != 0xFFFF)
-				adapter->ets = true;
-			else
-				adapter->ets = false;
-		}
-#ifdef NO_KNI
-#ifdef IGB_HWMON
-
-		igb_sysfs_init(adapter);
-#else
-#ifdef IGB_PROCFS
-
-		igb_procfs_init(adapter);
-#endif /* IGB_PROCFS */
-#endif /* IGB_HWMON */
-#endif /* NO_KNI */
-	} else {
-		adapter->ets = false;
-	}
-
-	if (hw->phy.media_type == e1000_media_type_copper) {
-		switch (hw->mac.type) {
-		case e1000_i350:
-		case e1000_i210:
-		case e1000_i211:
-			/* Enable EEE for internal copper PHY devices */
-			err = e1000_set_eee_i350(hw);
-			if ((!err) &&
-			    (adapter->flags & IGB_FLAG_EEE))
-				adapter->eee_advert =
-					MDIO_EEE_100TX | MDIO_EEE_1000T;
-			break;
-		case e1000_i354:
-			if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
-			    (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
-				err = e1000_set_eee_i354(hw);
-				if ((!err) &&
-				    (adapter->flags & IGB_FLAG_EEE))
-					adapter->eee_advert =
-					   MDIO_EEE_100TX | MDIO_EEE_1000T;
-			}
-			break;
-		default:
-			break;
-		}
-	}
-
-	/* send driver version info to firmware */
-	if (hw->mac.type >= e1000_i350)
-		igb_init_fw(adapter);
-
-#ifndef IGB_NO_LRO
-	if (netdev->features & NETIF_F_LRO)
-		dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
-	else
-		dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
-#endif
-	dev_info(pci_dev_to_dev(pdev),
-	         "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
-	         adapter->msix_entries ? "MSI-X" :
-	         (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
-	         adapter->num_rx_queues, adapter->num_tx_queues);
-
-	cards_found++;
-	*lad_dev = netdev;
-
-	pm_runtime_put_noidle(&pdev->dev);
-	return 0;
-
-//err_register:
-//	igb_release_hw_control(adapter);
-#ifdef HAVE_I2C_SUPPORT
-	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
-#endif /* HAVE_I2C_SUPPORT */
-err_eeprom:
-//	if (!e1000_check_reset_block(hw))
-//		e1000_phy_hw_reset(hw);
-
-	if (hw->flash_address)
-		iounmap(hw->flash_address);
-err_sw_init:
-//	igb_clear_interrupt_scheme(adapter);
-//	igb_reset_sriov_capability(adapter);
-	iounmap(hw->hw_addr);
-err_ioremap:
-	free_netdev(netdev);
-err_alloc_etherdev:
-//	pci_release_selected_regions(pdev,
-//	                             pci_select_bars(pdev, IORESOURCE_MEM));
-//err_pci_reg:
-//err_dma:
-	pci_disable_device(pdev);
-	return err;
-}
-
-
-void igb_kni_remove(struct pci_dev *pdev)
-{
-	pci_disable_device(pdev);
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
deleted file mode 100644
index 98209a10..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
+++ /dev/null
@@ -1,832 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-#include <linux/netdevice.h>
-
-#include "igb.h"
-
-/* This is the only thing that needs to be changed to adjust the
- * maximum number of ports that the driver can manage.
- */
-
-#define IGB_MAX_NIC 32
-
-#define OPTION_UNSET   -1
-#define OPTION_DISABLED 0
-#define OPTION_ENABLED  1
-#define MAX_NUM_LIST_OPTS 15
-
-/* All parameters are treated the same, as an integer array of values.
- * This macro just reduces the need to repeat the same declaration code
- * over and over (plus this helps to avoid typo bugs).
- */
-
-#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET }
-#ifndef module_param_array
-/* Module Parameters are always initialized to -1, so that the driver
- * can tell the difference between no user specified value or the
- * user asking for the default value.
- * The true default values are loaded in when igb_check_options is called.
- *
- * This is a GCC extension to ANSI C.
- * See the item "Labeled Elements in Initializers" in the section
- * "Extensions to the C Language Family" of the GCC documentation.
- */
-
-#define IGB_PARAM(X, desc) \
-	static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
-	MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \
-	MODULE_PARM_DESC(X, desc);
-#else
-#define IGB_PARAM(X, desc) \
-	static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
-	static unsigned int num_##X; \
-	module_param_array_named(X, X, int, &num_##X, 0); \
-	MODULE_PARM_DESC(X, desc);
-#endif
-
-/* Interrupt Throttle Rate (interrupts/sec)
- *
- * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
- */
-IGB_PARAM(InterruptThrottleRate,
-	  "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive");
-#define DEFAULT_ITR                    3
-#define MAX_ITR                   100000
-/* #define MIN_ITR                      120 */
-#define MIN_ITR                      0
-/* IntMode (Interrupt Mode)
- *
- * Valid Range: 0 - 2
- *
- * Default Value: 2 (MSI-X)
- */
-IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2");
-#define MAX_INTMODE                    IGB_INT_MODE_MSIX
-#define MIN_INTMODE                    IGB_INT_MODE_LEGACY
-
-IGB_PARAM(Node, "set the starting node to allocate memory on, default -1");
-
-/* LLIPort (Low Latency Interrupt TCP Port)
- *
- * Valid Range: 0 - 65535
- *
- * Default Value: 0 (disabled)
- */
-IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off");
-
-#define DEFAULT_LLIPORT                0
-#define MAX_LLIPORT               0xFFFF
-#define MIN_LLIPORT                    0
-
-/* LLIPush (Low Latency Interrupt on TCP Push flag)
- *
- * Valid Range: 0, 1
- *
- * Default Value: 0 (disabled)
- */
-IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off");
-
-#define DEFAULT_LLIPUSH                0
-#define MAX_LLIPUSH                    1
-#define MIN_LLIPUSH                    0
-
-/* LLISize (Low Latency Interrupt on Packet Size)
- *
- * Valid Range: 0 - 1500
- *
- * Default Value: 0 (disabled)
- */
-IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off");
-
-#define DEFAULT_LLISIZE                0
-#define MAX_LLISIZE                 1500
-#define MIN_LLISIZE                    0
-
-/* RSS (Enable RSS multiqueue receive)
- *
- * Valid Range: 0 - 8
- *
- * Default Value:  1
- */
-IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus");
-
-#define DEFAULT_RSS       1
-#define MAX_RSS           8
-#define MIN_RSS           0
-
-/* VMDQ (Enable VMDq multiqueue receive)
- *
- * Valid Range: 0 - 8
- *
- * Default Value:  0
- */
-IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0");
-
-#define DEFAULT_VMDQ      0
-#define MAX_VMDQ          MAX_RSS
-#define MIN_VMDQ          0
-
-/* max_vfs (Enable SR-IOV VF devices)
- *
- * Valid Range: 0 - 7
- *
- * Default Value:  0
- */
-IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0");
-
-#define DEFAULT_SRIOV     0
-#define MAX_SRIOV         7
-#define MIN_SRIOV         0
-
-/* MDD (Enable Malicious Driver Detection)
- *
- * Only available when SR-IOV is enabled - max_vfs is greater than 0
- *
- * Valid Range: 0, 1
- *
- * Default Value:  1
- */
-IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. "
-	  "Only available when max_vfs is greater than 0");
-
-#ifdef DEBUG
-
-/* Disable Hardware Reset on Tx Hang
- *
- * Valid Range: 0, 1
- *
- * Default Value: 0 (disabled, i.e. h/w will reset)
- */
-IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang");
-
-/* Dump Transmit and Receive buffers
- *
- * Valid Range: 0, 1
- *
- * Default Value: 0
- */
-IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request");
-
-#endif /* DEBUG */
-
-/* QueuePairs (Enable TX/RX queue pairs for interrupt handling)
- *
- * Valid Range: 0 - 1
- *
- * Default Value:  1
- */
-IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on");
-
-#define DEFAULT_QUEUE_PAIRS           1
-#define MAX_QUEUE_PAIRS               1
-#define MIN_QUEUE_PAIRS               0
-
-/* Enable/disable EEE (a.k.a. IEEE802.3az)
- *
- * Valid Range: 0, 1
- *
- * Default Value: 1
- */
- IGB_PARAM(EEE, "Enable/disable on parts that support the feature");
-
-/* Enable/disable DMA Coalescing
- *
- * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
- * 9000, 10000(msec), 250(usec), 500(usec)
- *
- * Default Value: 0
- */
- IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))");
-
-#ifndef IGB_NO_LRO
-/* Enable/disable Large Receive Offload
- *
- * Valid Values: 0(off), 1(on)
- *
- * Default Value: 0
- */
- IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off");
-
-#endif
-struct igb_opt_list {
-	int i;
-	char *str;
-};
-struct igb_option {
-	enum { enable_option, range_option, list_option } type;
-	const char *name;
-	const char *err;
-	int def;
-	union {
-		struct { /* range_option info */
-			int min;
-			int max;
-		} r;
-		struct { /* list_option info */
-			int nr;
-			struct igb_opt_list *p;
-		} l;
-	} arg;
-};
-
-static int igb_validate_option(unsigned int *value,
-			       struct igb_option *opt,
-			       struct igb_adapter *adapter)
-{
-	if (*value == OPTION_UNSET) {
-		*value = opt->def;
-		return 0;
-	}
-
-	switch (opt->type) {
-	case enable_option:
-		switch (*value) {
-		case OPTION_ENABLED:
-			DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name);
-			return 0;
-		case OPTION_DISABLED:
-			DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name);
-			return 0;
-		}
-		break;
-	case range_option:
-		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
-			DPRINTK(PROBE, INFO,
-					"%s set to %d\n", opt->name, *value);
-			return 0;
-		}
-		break;
-	case list_option: {
-		int i;
-		struct igb_opt_list *ent;
-
-		for (i = 0; i < opt->arg.l.nr; i++) {
-			ent = &opt->arg.l.p[i];
-			if (*value == ent->i) {
-				if (ent->str[0] != '\0')
-					DPRINTK(PROBE, INFO, "%s\n", ent->str);
-				return 0;
-			}
-		}
-	}
-		break;
-	default:
-		BUG();
-	}
-
-	DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n",
-	       opt->name, *value, opt->err);
-	*value = opt->def;
-	return -1;
-}
-
-/**
- * igb_check_options - Range Checking for Command Line Parameters
- * @adapter: board private structure
- *
- * This routine checks all command line parameters for valid user
- * input.  If an invalid value is given, or if no user specified
- * value exists, a default value is used.  The final value is stored
- * in a variable in the adapter structure.
- **/
-
-void igb_check_options(struct igb_adapter *adapter)
-{
-	int bd = adapter->bd_number;
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (bd >= IGB_MAX_NIC) {
-		DPRINTK(PROBE, NOTICE,
-		       "Warning: no configuration for board #%d\n", bd);
-		DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
-#ifndef module_param_array
-		bd = IGB_MAX_NIC;
-#endif
-	}
-
-	{ /* Interrupt Throttling Rate */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "Interrupt Throttling Rate (ints/sec)",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
-			.def  = DEFAULT_ITR,
-			.arg  = { .r = { .min = MIN_ITR,
-					 .max = MAX_ITR } }
-		};
-
-#ifdef module_param_array
-		if (num_InterruptThrottleRate > bd) {
-#endif
-			unsigned int itr = InterruptThrottleRate[bd];
-
-			switch (itr) {
-			case 0:
-				DPRINTK(PROBE, INFO, "%s turned off\n",
-				        opt.name);
-				if (hw->mac.type >= e1000_i350)
-					adapter->dmac = IGB_DMAC_DISABLE;
-				adapter->rx_itr_setting = itr;
-				break;
-			case 1:
-				DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
-					opt.name);
-				adapter->rx_itr_setting = itr;
-				break;
-			case 3:
-				DPRINTK(PROBE, INFO,
-				        "%s set to dynamic conservative mode\n",
-					opt.name);
-				adapter->rx_itr_setting = itr;
-				break;
-			default:
-				igb_validate_option(&itr, &opt, adapter);
-				/* Save the setting, because the dynamic bits
-				 * change itr.  In case of invalid user value,
-				 * default to conservative mode, else need to
-				 * clear the lower two bits because they are
-				 * used as control */
-				if (itr == 3) {
-					adapter->rx_itr_setting = itr;
-				} else {
-					adapter->rx_itr_setting = 1000000000 /
-					                          (itr * 256);
-					adapter->rx_itr_setting &= ~3;
-				}
-				break;
-			}
-#ifdef module_param_array
-		} else {
-			adapter->rx_itr_setting = opt.def;
-		}
-#endif
-		adapter->tx_itr_setting = adapter->rx_itr_setting;
-	}
-	{ /* Interrupt Mode */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "Interrupt Mode",
-			.err  = "defaulting to 2 (MSI-X)",
-			.def  = IGB_INT_MODE_MSIX,
-			.arg  = { .r = { .min = MIN_INTMODE,
-					 .max = MAX_INTMODE } }
-		};
-
-#ifdef module_param_array
-		if (num_IntMode > bd) {
-#endif
-			unsigned int int_mode = IntMode[bd];
-			igb_validate_option(&int_mode, &opt, adapter);
-			adapter->int_mode = int_mode;
-#ifdef module_param_array
-		} else {
-			adapter->int_mode = opt.def;
-		}
-#endif
-	}
-	{ /* Low Latency Interrupt TCP Port */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "Low Latency Interrupt TCP Port",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_LLIPORT),
-			.def  = DEFAULT_LLIPORT,
-			.arg  = { .r = { .min = MIN_LLIPORT,
-					 .max = MAX_LLIPORT } }
-		};
-
-#ifdef module_param_array
-		if (num_LLIPort > bd) {
-#endif
-			adapter->lli_port = LLIPort[bd];
-			if (adapter->lli_port) {
-				igb_validate_option(&adapter->lli_port, &opt,
-				        adapter);
-			} else {
-				DPRINTK(PROBE, INFO, "%s turned off\n",
-					opt.name);
-			}
-#ifdef module_param_array
-		} else {
-			adapter->lli_port = opt.def;
-		}
-#endif
-	}
-	{ /* Low Latency Interrupt on Packet Size */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "Low Latency Interrupt on Packet Size",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_LLISIZE),
-			.def  = DEFAULT_LLISIZE,
-			.arg  = { .r = { .min = MIN_LLISIZE,
-					 .max = MAX_LLISIZE } }
-		};
-
-#ifdef module_param_array
-		if (num_LLISize > bd) {
-#endif
-			adapter->lli_size = LLISize[bd];
-			if (adapter->lli_size) {
-				igb_validate_option(&adapter->lli_size, &opt,
-				        adapter);
-			} else {
-				DPRINTK(PROBE, INFO, "%s turned off\n",
-					opt.name);
-			}
-#ifdef module_param_array
-		} else {
-			adapter->lli_size = opt.def;
-		}
-#endif
-	}
-	{ /* Low Latency Interrupt on TCP Push flag */
-		struct igb_option opt = {
-			.type = enable_option,
-			.name = "Low Latency Interrupt on TCP Push flag",
-			.err  = "defaulting to Disabled",
-			.def  = OPTION_DISABLED
-		};
-
-#ifdef module_param_array
-		if (num_LLIPush > bd) {
-#endif
-			unsigned int lli_push = LLIPush[bd];
-			igb_validate_option(&lli_push, &opt, adapter);
-			adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0;
-#ifdef module_param_array
-		} else {
-			adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0;
-		}
-#endif
-	}
-	{ /* SRIOV - Enable SR-IOV VF devices */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "max_vfs - SR-IOV VF devices",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_SRIOV),
-			.def  = DEFAULT_SRIOV,
-			.arg  = { .r = { .min = MIN_SRIOV,
-					 .max = MAX_SRIOV } }
-		};
-
-#ifdef module_param_array
-		if (num_max_vfs > bd) {
-#endif
-			adapter->vfs_allocated_count = max_vfs[bd];
-			igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter);
-
-#ifdef module_param_array
-		} else {
-			adapter->vfs_allocated_count = opt.def;
-		}
-#endif
-		if (adapter->vfs_allocated_count) {
-			switch (hw->mac.type) {
-			case e1000_82575:
-			case e1000_82580:
-			case e1000_i210:
-			case e1000_i211:
-			case e1000_i354:
-				adapter->vfs_allocated_count = 0;
-				DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n");
-			default:
-				break;
-			}
-		}
-	}
-	{ /* VMDQ - Enable VMDq multiqueue receive */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "VMDQ - VMDq multiqueue queue count",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_VMDQ),
-			.def  = DEFAULT_VMDQ,
-			.arg  = { .r = { .min = MIN_VMDQ,
-					 .max = (MAX_VMDQ - adapter->vfs_allocated_count) } }
-		};
-		if ((hw->mac.type != e1000_i210) ||
-		    (hw->mac.type != e1000_i211)) {
-#ifdef module_param_array
-		if (num_VMDQ > bd) {
-#endif
-			adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]);
-			if (adapter->vfs_allocated_count && !adapter->vmdq_pools) {
-				DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n");
-				adapter->vmdq_pools = 1;
-			}
-			igb_validate_option(&adapter->vmdq_pools, &opt, adapter);
-
-#ifdef module_param_array
-		} else {
-			if (!adapter->vfs_allocated_count)
-				adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def);
-			else
-				adapter->vmdq_pools = 1;
-		}
-#endif
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-		if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) {
-			DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n");
-			adapter->vmdq_pools = 0;
-		}
-#endif
-
-	} else {
-		DPRINTK(PROBE, INFO, "VMDq option is not supported.\n");
-		adapter->vmdq_pools = opt.def;
-	}
-	}
-	{ /* RSS - Enable RSS multiqueue receives */
-		struct igb_option opt = {
-			.type = range_option,
-			.name = "RSS - RSS multiqueue receive count",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_RSS),
-			.def  = DEFAULT_RSS,
-			.arg  = { .r = { .min = MIN_RSS,
-					 .max = MAX_RSS } }
-		};
-
-		switch (hw->mac.type) {
-		case e1000_82575:
-#ifndef CONFIG_IGB_VMDQ_NETDEV
-			if (!!adapter->vmdq_pools) {
-				if (adapter->vmdq_pools <= 2) {
-					if (adapter->vmdq_pools == 2)
-						opt.arg.r.max = 3;
-				} else {
-					opt.arg.r.max = 1;
-				}
-			} else {
-				opt.arg.r.max = 4;
-			}
-#else
-			opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4;
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-			break;
-		case e1000_i210:
-			opt.arg.r.max = 4;
-			break;
-		case e1000_i211:
-			opt.arg.r.max = 2;
-			break;
-		case e1000_82576:
-#ifndef CONFIG_IGB_VMDQ_NETDEV
-			if (!!adapter->vmdq_pools)
-				opt.arg.r.max = 2;
-			break;
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-		case e1000_82580:
-		case e1000_i350:
-		case e1000_i354:
-		default:
-			if (!!adapter->vmdq_pools)
-				opt.arg.r.max = 1;
-			break;
-		}
-
-		if (adapter->int_mode != IGB_INT_MODE_MSIX) {
-			DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n",
-				opt.err);
-			opt.arg.r.max = 1;
-		}
-
-#ifdef module_param_array
-		if (num_RSS > bd) {
-#endif
-			adapter->rss_queues = RSS[bd];
-			switch (adapter->rss_queues) {
-			case 1:
-				break;
-			default:
-				igb_validate_option(&adapter->rss_queues, &opt, adapter);
-				if (adapter->rss_queues)
-					break;
-			case 0:
-				adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus());
-				break;
-			}
-#ifdef module_param_array
-		} else {
-			adapter->rss_queues = opt.def;
-		}
-#endif
-	}
-	{ /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */
-		struct igb_option opt = {
-			.type = enable_option,
-			.name = "QueuePairs - Tx/Rx queue pairs for interrupt handling",
-			.err  = "defaulting to Enabled",
-			.def  = OPTION_ENABLED
-		};
-#ifdef module_param_array
-		if (num_QueuePairs > bd) {
-#endif
-			unsigned int qp = QueuePairs[bd];
-			/*
-			 * We must enable queue pairs if the number of queues
-			 * exceeds the number of available interrupts. We are
-			 * limited to 10, or 3 per unallocated vf. On I210 and
-			 * I211 devices, we are limited to 5 interrupts.
-			 * However, since I211 only supports 2 queues, we do not
-			 * need to check and override the user option.
-			 */
-			if (qp == OPTION_DISABLED) {
-				if (adapter->rss_queues > 4)
-					qp = OPTION_ENABLED;
-
-				if (adapter->vmdq_pools > 4)
-					qp = OPTION_ENABLED;
-
-				if (adapter->rss_queues > 1 &&
-				    (adapter->vmdq_pools > 3 ||
-				     adapter->vfs_allocated_count > 6))
-					qp = OPTION_ENABLED;
-
-				if (hw->mac.type == e1000_i210 &&
-				    adapter->rss_queues > 2)
-					qp = OPTION_ENABLED;
-
-				if (qp == OPTION_ENABLED)
-					DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n",
-						opt.err);
-			}
-			igb_validate_option(&qp, &opt, adapter);
-			adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0;
-#ifdef module_param_array
-		} else {
-			adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0;
-		}
-#endif
-	}
-	{ /* EEE -  Enable EEE for capable adapters */
-
-		if (hw->mac.type >= e1000_i350) {
-			struct igb_option opt = {
-				.type = enable_option,
-				.name = "EEE Support",
-				.err  = "defaulting to Enabled",
-				.def  = OPTION_ENABLED
-			};
-#ifdef module_param_array
-			if (num_EEE > bd) {
-#endif
-				unsigned int eee = EEE[bd];
-				igb_validate_option(&eee, &opt, adapter);
-				adapter->flags |= eee ? IGB_FLAG_EEE : 0;
-				if (eee)
-					hw->dev_spec._82575.eee_disable = false;
-				else
-					hw->dev_spec._82575.eee_disable = true;
-
-#ifdef module_param_array
-			} else {
-				adapter->flags |= opt.def ? IGB_FLAG_EEE : 0;
-				if (adapter->flags & IGB_FLAG_EEE)
-					hw->dev_spec._82575.eee_disable = false;
-				else
-					hw->dev_spec._82575.eee_disable = true;
-			}
-#endif
-		}
-	}
-	{ /* DMAC -  Enable DMA Coalescing for capable adapters */
-
-		if (hw->mac.type >= e1000_i350) {
-			struct igb_opt_list list [] = {
-				{ IGB_DMAC_DISABLE, "DMAC Disable"},
-				{ IGB_DMAC_MIN, "DMAC 250 usec"},
-				{ IGB_DMAC_500, "DMAC 500 usec"},
-				{ IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"},
-				{ IGB_DMAC_2000, "DMAC 2000 usec"},
-				{ IGB_DMAC_3000, "DMAC 3000 usec"},
-				{ IGB_DMAC_4000, "DMAC 4000 usec"},
-				{ IGB_DMAC_5000, "DMAC 5000 usec"},
-				{ IGB_DMAC_6000, "DMAC 6000 usec"},
-				{ IGB_DMAC_7000, "DMAC 7000 usec"},
-				{ IGB_DMAC_8000, "DMAC 8000 usec"},
-				{ IGB_DMAC_9000, "DMAC 9000 usec"},
-				{ IGB_DMAC_MAX, "DMAC 10000 usec"}
-			};
-			struct igb_option opt = {
-				.type = list_option,
-				.name = "DMA Coalescing",
-				.err  = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE),
-				.def  = IGB_DMAC_DISABLE,
-				.arg = { .l = { .nr = 13,
-					 	.p = list
-					}
-				}
-			};
-#ifdef module_param_array
-			if (num_DMAC > bd) {
-#endif
-				unsigned int dmac = DMAC[bd];
-				if (adapter->rx_itr_setting == IGB_DMAC_DISABLE)
-					dmac = IGB_DMAC_DISABLE;
-				igb_validate_option(&dmac, &opt, adapter);
-				switch (dmac) {
-				case IGB_DMAC_DISABLE:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_MIN:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_500:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_EN_DEFAULT:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_2000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_3000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_4000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_5000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_6000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_7000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_8000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_9000:
-					adapter->dmac = dmac;
-					break;
-				case IGB_DMAC_MAX:
-					adapter->dmac = dmac;
-					break;
-				default:
-					adapter->dmac = opt.def;
-					DPRINTK(PROBE, INFO,
-					"Invalid DMAC setting, "
-					"resetting DMAC to %d\n", opt.def);
-				}
-#ifdef module_param_array
-			} else
-				adapter->dmac = opt.def;
-#endif
-		}
-	}
-#ifndef IGB_NO_LRO
-	{ /* LRO - Enable Large Receive Offload */
-		struct igb_option opt = {
-			.type = enable_option,
-			.name = "LRO - Large Receive Offload",
-			.err  = "defaulting to Disabled",
-			.def  = OPTION_DISABLED
-		};
-		struct net_device *netdev = adapter->netdev;
-#ifdef module_param_array
-		if (num_LRO > bd) {
-#endif
-			unsigned int lro = LRO[bd];
-			igb_validate_option(&lro, &opt, adapter);
-			netdev->features |= lro ? NETIF_F_LRO : 0;
-#ifdef module_param_array
-		} else if (opt.def == OPTION_ENABLED) {
-			netdev->features |= NETIF_F_LRO;
-		}
-#endif
-	}
-#endif /* IGB_NO_LRO */
-	{ /* MDD - Enable Malicious Driver Detection. Only available when
-	     SR-IOV is enabled. */
-		struct igb_option opt = {
-			.type = enable_option,
-			.name = "Malicious Driver Detection",
-			.err  = "defaulting to 1",
-			.def  = OPTION_ENABLED,
-			.arg  = { .r = { .min = OPTION_DISABLED,
-					 .max = OPTION_ENABLED } }
-		};
-
-#ifdef module_param_array
-		if (num_MDD > bd) {
-#endif
-			adapter->mdd = MDD[bd];
-			igb_validate_option((uint *)&adapter->mdd, &opt,
-					    adapter);
-#ifdef module_param_array
-		} else {
-			adapter->mdd = opt.def;
-		}
-#endif
-	}
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
deleted file mode 100644
index ec2b86a0..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/* ethtool register test data */
-struct igb_reg_test {
-	u16 reg;
-	u16 reg_offset;
-	u16 array_len;
-	u16 test_type;
-	u32 mask;
-	u32 write;
-};
-
-/* In the hardware, registers are laid out either singly, in arrays
- * spaced 0x100 bytes apart, or in contiguous tables.  We assume
- * most tests take place on arrays or single registers (handled
- * as a single-element array) and special-case the tables.
- * Table tests are always pattern tests.
- *
- * We also make provision for some required setup steps by specifying
- * registers to be written without any read-back testing.
- */
-
-#define PATTERN_TEST	1
-#define SET_READ_TEST	2
-#define WRITE_NO_TEST	3
-#define TABLE32_TEST	4
-#define TABLE64_TEST_LO	5
-#define TABLE64_TEST_HI	6
-
-/* i210 reg test */
-static struct igb_reg_test reg_test_i210[] = {
-	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	/* RDH is read-only for i210, only test RDT. */
-	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 },
-	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
-	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
-	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
-						0x900FFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	   0, 128, TABLE32_TEST,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-/* i350 reg test */
-static struct igb_reg_test reg_test_i350[] = {
-	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	/* VET is readonly on i350 */
-	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	/* RDH is read-only for i350, only test RDT. */
-	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
-	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
-	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
-	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
-						0xC3FFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 16, TABLE64_TEST_HI,
-						0xC3FFFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	   0, 128, TABLE32_TEST,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-/* 82580 reg test */
-static struct igb_reg_test reg_test_82580[] = {
-	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	/* RDH is read-only for 82580, only test RDT. */
-	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
-	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
-	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
-	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
-						0x83FFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI,
-						0x83FFFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	   0, 128, TABLE32_TEST,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-/* 82576 reg test */
-static struct igb_reg_test reg_test_82576[] = {
-	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_RDBAL(4),  0x40,  12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(4),  0x40,  12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(4),  0x40,  12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	/* Enable all queues before testing. */
-	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
-	{ E1000_RXDCTL(4), 0x40,  12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
-	/* RDH is read-only for 82576, only test RDT. */
-	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RDT(4),	   0x40,  12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, 0 },
-	{ E1000_RXDCTL(4), 0x40,  12, WRITE_NO_TEST, 0, 0 },
-	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
-	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
-	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_TDBAL(4),  0x40,  12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(4),  0x40,  12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(4),  0x40,  12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
-	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
-	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
-	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
-						0x83FFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI,
-						0x83FFFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	   0, 128, TABLE32_TEST,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-/* 82575 register test */
-static struct igb_reg_test reg_test_82575[] = {
-	{ E1000_FCAL,	0x100,	1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_FCAH,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_FCT,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
-	{ E1000_VET,	0x100,	1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDBAL(0),	0x100,	4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_RDBAH(0),	0x100,	4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RDLEN(0),	0x100,	4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	/* Enable all four RX queues before testing. */
-	{ E1000_RXDCTL(0),	0x100,	4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
-	/* RDH is read-only for 82575, only test RDT. */
-	{ E1000_RDT(0),	0x100,	4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_RXDCTL(0),	0x100,	4, WRITE_NO_TEST, 0, 0 },
-	{ E1000_FCRTH,	0x100,	1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
-	{ E1000_FCTTV,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ E1000_TIPG,	0x100,	1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
-	{ E1000_TDBAL(0),	0x100,	4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ E1000_TDBAH(0),	0x100,	4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_TDLEN(0),	0x100,	4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ E1000_RCTL,	0x100,	1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_RCTL, 	0x100,	1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB },
-	{ E1000_RCTL, 	0x100,	1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF },
-	{ E1000_TCTL,	0x100,	1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
-	{ E1000_TXCW,	0x100,	1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF },
-	{ E1000_RA,	0,	16, TABLE64_TEST_LO,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ E1000_RA,	0,	16, TABLE64_TEST_HI,
-						0x800FFFFF, 0xFFFFFFFF },
-	{ E1000_MTA,	0,	128, TABLE32_TEST,
-						0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
deleted file mode 100644
index cdd807b9..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
+++ /dev/null
@@ -1,421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-#include <linux/tcp.h>
-
-#include "igb.h"
-#include "igb_vmdq.h"
-#include <linux/if_vlan.h>
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-int igb_vmdq_open(struct net_device *dev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	struct net_device *main_netdev = adapter->netdev;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	if (test_bit(__IGB_DOWN, &adapter->state)) {
-		DPRINTK(DRV, WARNING,
-			"Open %s before opening this device.\n",
-			main_netdev->name);
-		return -EAGAIN;
-	}
-	netif_carrier_off(dev);
-	vadapter->tx_ring->vmdq_netdev = dev;
-	vadapter->rx_ring->vmdq_netdev = dev;
-	if (is_valid_ether_addr(dev->dev_addr)) {
-		igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
-		igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
-	}
-	netif_carrier_on(dev);
-	return 0;
-}
-
-int igb_vmdq_close(struct net_device *dev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	netif_carrier_off(dev);
-	igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
-
-	vadapter->tx_ring->vmdq_netdev = NULL;
-	vadapter->rx_ring->vmdq_netdev = NULL;
-	return 0;
-}
-
-netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-
-	return igb_xmit_frame_ring(skb, vadapter->tx_ring);
-}
-
-struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-        struct igb_adapter *adapter = vadapter->real_adapter;
-        struct e1000_hw *hw = &adapter->hw;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	vadapter->net_stats.rx_packets +=
-			E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue));
-	E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0);
-        vadapter->net_stats.tx_packets +=
-			E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue));
-        E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0);
-        vadapter->net_stats.rx_bytes +=
-			E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue));
-        E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0);
-        vadapter->net_stats.tx_bytes +=
-			E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue));
-        E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0);
-        vadapter->net_stats.multicast +=
-			E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue));
-        E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0);
-	/* only return the current stats */
-	return &vadapter->net_stats;
-}
-
-/**
- * igb_write_vm_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- *                0 on no addresses written
- *                X on writing X addresses to the RAR table
- **/
-static int igb_write_vm_addr_list(struct net_device *netdev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-        struct igb_adapter *adapter = vadapter->real_adapter;
-	int count = 0;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev_uc_count(netdev) > igb_available_rars(adapter))
-		return -ENOMEM;
-
-	if (!netdev_uc_empty(netdev)) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-		struct netdev_hw_addr *ha;
-#else
-		struct dev_mc_list *ha;
-#endif
-		netdev_for_each_uc_addr(ha, netdev) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-			igb_del_mac_filter(adapter, ha->addr, hw_queue);
-			igb_add_mac_filter(adapter, ha->addr, hw_queue);
-#else
-			igb_del_mac_filter(adapter, ha->da_addr, hw_queue);
-			igb_add_mac_filter(adapter, ha->da_addr, hw_queue);
-#endif
-			count++;
-		}
-	}
-	return count;
-}
-
-
-#define E1000_VMOLR_UPE		0x20000000 /* Unicast promiscuous mode */
-void igb_vmdq_set_rx_mode(struct net_device *dev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-        struct igb_adapter *adapter = vadapter->real_adapter;
-        struct e1000_hw *hw = &adapter->hw;
-	u32 vmolr, rctl;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	/* Check for Promiscuous and All Multicast modes */
-	vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue));
-
-	/* clear the affected bits */
-	vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME |
-		   E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE);
-
-	if (dev->flags & IFF_PROMISC) {
-		vmolr |= E1000_VMOLR_UPE;
-		rctl = E1000_READ_REG(hw, E1000_RCTL);
-		rctl |= E1000_RCTL_UPE;
-		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-	} else {
-		rctl = E1000_READ_REG(hw, E1000_RCTL);
-		rctl &= ~E1000_RCTL_UPE;
-		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
-		if (dev->flags & IFF_ALLMULTI) {
-			vmolr |= E1000_VMOLR_MPME;
-		} else {
-			/*
-			 * Write addresses to the MTA, if the attempt fails
-			 * then we should just turn on promiscuous mode so
-			 * that we can at least receive multicast traffic
-			 */
-			if (igb_write_mc_addr_list(adapter->netdev) != 0)
-				vmolr |= E1000_VMOLR_ROMPE;
-		}
-#ifdef HAVE_SET_RX_MODE
-		/*
-		 * Write addresses to available RAR registers, if there is not
-		 * sufficient space to store all the addresses then enable
-		 * unicast promiscuous mode
-		 */
-		if (igb_write_vm_addr_list(dev) < 0)
-			vmolr |= E1000_VMOLR_UPE;
-#endif
-	}
-	E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr);
-
-	return;
-}
-
-int igb_vmdq_set_mac(struct net_device *dev, void *p)
-{
-	struct sockaddr *addr = p;
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-        struct igb_adapter *adapter = vadapter->real_adapter;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-	return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
-}
-
-int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-
-	if (adapter->netdev->mtu < new_mtu) {
-		DPRINTK(PROBE, INFO,
-			"Set MTU on %s to >= %d "
-			"before changing MTU on %s\n",
-			adapter->netdev->name, new_mtu, dev->name);
-		return -EINVAL;
-	}
-	dev->mtu = new_mtu;
-	return 0;
-}
-
-void igb_vmdq_tx_timeout(struct net_device *dev)
-{
-	return;
-}
-
-void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	vadapter->vlgrp = grp;
-
-	igb_enable_vlan_tags(adapter);
-	E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0);
-
-	return;
-}
-void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-#ifndef HAVE_NETDEV_VLAN_FEATURES
-	struct net_device *v_netdev;
-#endif
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	/* attempt to add filter to vlvf array */
-	igb_vlvf_set(adapter, vid, TRUE, hw_queue);
-
-#ifndef HAVE_NETDEV_VLAN_FEATURES
-
-	/* Copy feature flags from netdev to the vlan netdev for this vid.
-	 * This allows things like TSO to bubble down to our vlan device.
-	 */
-	v_netdev = vlan_group_get_device(vadapter->vlgrp, vid);
-	v_netdev->features |= adapter->netdev->features;
-	vlan_group_set_device(vadapter->vlgrp, vid, v_netdev);
-#endif
-
-	return;
-}
-void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	int hw_queue = vadapter->rx_ring->queue_index +
-		       adapter->vfs_allocated_count;
-
-	vlan_group_set_device(vadapter->vlgrp, vid, NULL);
-	/* remove vlan from VLVF table array */
-	igb_vlvf_set(adapter, vid, FALSE, hw_queue);
-
-
-	return;
-}
-
-static int igb_vmdq_get_settings(struct net_device *netdev,
-				   struct ethtool_cmd *ecmd)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	u32 status;
-
-	if (hw->phy.media_type == e1000_media_type_copper) {
-
-		ecmd->supported = (SUPPORTED_10baseT_Half |
-				   SUPPORTED_10baseT_Full |
-				   SUPPORTED_100baseT_Half |
-				   SUPPORTED_100baseT_Full |
-				   SUPPORTED_1000baseT_Full|
-				   SUPPORTED_Autoneg |
-				   SUPPORTED_TP);
-		ecmd->advertising = ADVERTISED_TP;
-
-		if (hw->mac.autoneg == 1) {
-			ecmd->advertising |= ADVERTISED_Autoneg;
-			/* the e1000 autoneg seems to match ethtool nicely */
-			ecmd->advertising |= hw->phy.autoneg_advertised;
-		}
-
-		ecmd->port = PORT_TP;
-		ecmd->phy_address = hw->phy.addr;
-	} else {
-		ecmd->supported   = (SUPPORTED_1000baseT_Full |
-				     SUPPORTED_FIBRE |
-				     SUPPORTED_Autoneg);
-
-		ecmd->advertising = (ADVERTISED_1000baseT_Full |
-				     ADVERTISED_FIBRE |
-				     ADVERTISED_Autoneg);
-
-		ecmd->port = PORT_FIBRE;
-	}
-
-	ecmd->transceiver = XCVR_INTERNAL;
-
-	status = E1000_READ_REG(hw, E1000_STATUS);
-
-	if (status & E1000_STATUS_LU) {
-
-		if ((status & E1000_STATUS_SPEED_1000) ||
-		    hw->phy.media_type != e1000_media_type_copper)
-			ecmd->speed = SPEED_1000;
-		else if (status & E1000_STATUS_SPEED_100)
-			ecmd->speed = SPEED_100;
-		else
-			ecmd->speed = SPEED_10;
-
-		if ((status & E1000_STATUS_FD) ||
-		    hw->phy.media_type != e1000_media_type_copper)
-			ecmd->duplex = DUPLEX_FULL;
-		else
-			ecmd->duplex = DUPLEX_HALF;
-	} else {
-		ecmd->speed = -1;
-		ecmd->duplex = -1;
-	}
-
-	ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
-	return 0;
-}
-
-
-static u32 igb_vmdq_get_msglevel(struct net_device *netdev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	return adapter->msg_enable;
-}
-
-static void igb_vmdq_get_drvinfo(struct net_device *netdev,
-				   struct ethtool_drvinfo *drvinfo)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-	struct net_device *main_netdev = adapter->netdev;
-
-	strncpy(drvinfo->driver, igb_driver_name, 32);
-	strncpy(drvinfo->version, igb_driver_version, 32);
-
-	strncpy(drvinfo->fw_version, "N/A", 4);
-	snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name,
-		 vadapter->rx_ring->queue_index);
-	drvinfo->n_stats = 0;
-	drvinfo->testinfo_len = 0;
-	drvinfo->regdump_len = 0;
-}
-
-static void igb_vmdq_get_ringparam(struct net_device *netdev,
-				     struct ethtool_ringparam *ring)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-
-	struct igb_ring *tx_ring = vadapter->tx_ring;
-	struct igb_ring *rx_ring = vadapter->rx_ring;
-
-	ring->rx_max_pending = IGB_MAX_RXD;
-	ring->tx_max_pending = IGB_MAX_TXD;
-	ring->rx_mini_max_pending = 0;
-	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = rx_ring->count;
-	ring->tx_pending = tx_ring->count;
-	ring->rx_mini_pending = 0;
-	ring->rx_jumbo_pending = 0;
-}
-static u32 igb_vmdq_get_rx_csum(struct net_device *netdev)
-{
-	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
-	struct igb_adapter *adapter = vadapter->real_adapter;
-
-	return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags);
-}
-
-
-static struct ethtool_ops igb_vmdq_ethtool_ops = {
-	.get_settings           = igb_vmdq_get_settings,
-	.get_drvinfo            = igb_vmdq_get_drvinfo,
-	.get_link               = ethtool_op_get_link,
-	.get_ringparam          = igb_vmdq_get_ringparam,
-	.get_rx_csum            = igb_vmdq_get_rx_csum,
-	.get_tx_csum            = ethtool_op_get_tx_csum,
-	.get_sg                 = ethtool_op_get_sg,
-	.set_sg                 = ethtool_op_set_sg,
-	.get_msglevel           = igb_vmdq_get_msglevel,
-#ifdef NETIF_F_TSO
-	.get_tso                = ethtool_op_get_tso,
-#endif
-#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
-	.get_perm_addr          = ethtool_op_get_perm_addr,
-#endif
-};
-
-void igb_vmdq_set_ethtool_ops(struct net_device *netdev)
-{
-	SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops);
-}
-
-
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
deleted file mode 100644
index e68c48cf..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IGB_VMDQ_H_
-#define _IGB_VMDQ_H_
-
-#ifdef CONFIG_IGB_VMDQ_NETDEV
-int igb_vmdq_open(struct net_device *dev);
-int igb_vmdq_close(struct net_device *dev);
-netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev);
-struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev);
-void igb_vmdq_set_rx_mode(struct net_device *dev);
-int igb_vmdq_set_mac(struct net_device *dev, void *addr);
-int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu);
-void igb_vmdq_tx_timeout(struct net_device *dev);
-void igb_vmdq_vlan_rx_register(struct net_device *dev,
-				 struct vlan_group *grp);
-void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
-void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
-void igb_vmdq_set_ethtool_ops(struct net_device *netdev);
-#endif /* CONFIG_IGB_VMDQ_NETDEV */
-#endif /* _IGB_VMDQ_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
deleted file mode 100644
index fd3175b5..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ /dev/null
@@ -1,3933 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _KCOMPAT_H_
-#define _KCOMPAT_H_
-
-#ifndef LINUX_VERSION_CODE
-#include <linux/version.h>
-#else
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
-#endif
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/mii.h>
-#include <linux/vmalloc.h>
-#include <asm/io.h>
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-
-/* NAPI enable/disable flags here */
-#define NAPI
-
-#define adapter_struct igb_adapter
-#define adapter_q_vector igb_q_vector
-#define NAPI
-
-/* and finally set defines so that the code sees the changes */
-#ifdef NAPI
-#else
-#endif /* NAPI */
-
-/* packet split disable/enable */
-#ifdef DISABLE_PACKET_SPLIT
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-#define CONFIG_IGB_DISABLE_PACKET_SPLIT
-#endif
-#endif /* DISABLE_PACKET_SPLIT */
-
-/* MSI compatibility code for all kernels and drivers */
-#ifdef DISABLE_PCI_MSI
-#undef CONFIG_PCI_MSI
-#endif
-#ifndef CONFIG_PCI_MSI
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
-struct msix_entry {
-	u16 vector; /* kernel uses to write allocated vector */
-	u16 entry;  /* driver uses to specify entry, OS writes */
-};
-#endif
-#undef pci_enable_msi
-#define pci_enable_msi(a) -ENOTSUPP
-#undef pci_disable_msi
-#define pci_disable_msi(a) do {} while (0)
-#undef pci_enable_msix
-#define pci_enable_msix(a, b, c) -ENOTSUPP
-#undef pci_disable_msix
-#define pci_disable_msix(a) do {} while (0)
-#define msi_remove_pci_irq_vectors(a) do {} while (0)
-#endif /* CONFIG_PCI_MSI */
-#ifdef DISABLE_PM
-#undef CONFIG_PM
-#endif
-
-#ifdef DISABLE_NET_POLL_CONTROLLER
-#undef CONFIG_NET_POLL_CONTROLLER
-#endif
-
-#ifndef PMSG_SUSPEND
-#define PMSG_SUSPEND 3
-#endif
-
-/* generic boolean compatibility */
-#undef TRUE
-#undef FALSE
-#define TRUE true
-#define FALSE false
-#ifdef GCC_VERSION
-#if ( GCC_VERSION < 3000 )
-#define _Bool char
-#endif
-#else
-#define _Bool char
-#endif
-
-/* kernels less than 2.4.14 don't have this */
-#ifndef ETH_P_8021Q
-#define ETH_P_8021Q 0x8100
-#endif
-
-#ifndef module_param
-#define module_param(v,t,p) MODULE_PARM(v, "i");
-#endif
-
-#ifndef DMA_64BIT_MASK
-#define DMA_64BIT_MASK  0xffffffffffffffffULL
-#endif
-
-#ifndef DMA_32BIT_MASK
-#define DMA_32BIT_MASK  0x00000000ffffffffULL
-#endif
-
-#ifndef PCI_CAP_ID_EXP
-#define PCI_CAP_ID_EXP 0x10
-#endif
-
-#ifndef PCIE_LINK_STATE_L0S
-#define PCIE_LINK_STATE_L0S 1
-#endif
-#ifndef PCIE_LINK_STATE_L1
-#define PCIE_LINK_STATE_L1 2
-#endif
-
-#ifndef mmiowb
-#ifdef CONFIG_IA64
-#define mmiowb() asm volatile ("mf.a" ::: "memory")
-#else
-#define mmiowb()
-#endif
-#endif
-
-#ifndef SET_NETDEV_DEV
-#define SET_NETDEV_DEV(net, pdev)
-#endif
-
-#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
-#define free_netdev(x)	kfree(x)
-#endif
-
-#ifdef HAVE_POLL_CONTROLLER
-#define CONFIG_NET_POLL_CONTROLLER
-#endif
-
-#ifndef SKB_DATAREF_SHIFT
-/* if we do not have the infrastructure to detect if skb_header is cloned
-   just return false in all cases */
-#define skb_header_cloned(x) 0
-#endif
-
-#ifndef NETIF_F_GSO
-#define gso_size tso_size
-#define gso_segs tso_segs
-#endif
-
-#ifndef NETIF_F_GRO
-#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
-		vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
-#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
-#endif
-
-#ifndef NETIF_F_SCTP_CSUM
-#define NETIF_F_SCTP_CSUM 0
-#endif
-
-#ifndef NETIF_F_LRO
-#define NETIF_F_LRO (1 << 15)
-#endif
-
-#ifndef NETIF_F_NTUPLE
-#define NETIF_F_NTUPLE (1 << 27)
-#endif
-
-#ifndef IPPROTO_SCTP
-#define IPPROTO_SCTP 132
-#endif
-
-#ifndef CHECKSUM_PARTIAL
-#define CHECKSUM_PARTIAL CHECKSUM_HW
-#define CHECKSUM_COMPLETE CHECKSUM_HW
-#endif
-
-#ifndef __read_mostly
-#define __read_mostly
-#endif
-
-#ifndef MII_RESV1
-#define MII_RESV1		0x17		/* Reserved...		*/
-#endif
-
-#ifndef unlikely
-#define unlikely(_x) _x
-#define likely(_x) _x
-#endif
-
-#ifndef WARN_ON
-#define WARN_ON(x)
-#endif
-
-#ifndef PCI_DEVICE
-#define PCI_DEVICE(vend,dev) \
-	.vendor = (vend), .device = (dev), \
-	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
-#endif
-
-#ifndef node_online
-#define node_online(node) ((node) == 0)
-#endif
-
-#ifndef num_online_cpus
-#define num_online_cpus() smp_num_cpus
-#endif
-
-#ifndef cpu_online
-#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
-#endif
-
-#ifndef _LINUX_RANDOM_H
-#include <linux/random.h>
-#endif
-
-#ifndef DECLARE_BITMAP
-#ifndef BITS_TO_LONGS
-#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#endif
-#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
-#endif
-
-#ifndef VLAN_HLEN
-#define VLAN_HLEN 4
-#endif
-
-#ifndef VLAN_ETH_HLEN
-#define VLAN_ETH_HLEN 18
-#endif
-
-#ifndef VLAN_ETH_FRAME_LEN
-#define VLAN_ETH_FRAME_LEN 1518
-#endif
-
-#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
-#define dca_get_tag(b) 0
-#define dca_add_requester(a) -1
-#define dca_remove_requester(b) do { } while(0)
-#define DCA_PROVIDER_ADD     0x0001
-#define DCA_PROVIDER_REMOVE  0x0002
-#endif
-
-#ifndef DCA_GET_TAG_TWO_ARGS
-#define dca3_get_tag(a,b) dca_get_tag(b)
-#endif
-
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#if defined(__i386__) || defined(__x86_64__)
-#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#endif
-#endif
-
-/* taken from 2.6.24 definition in linux/kernel.h */
-#ifndef IS_ALIGNED
-#define IS_ALIGNED(x,a)         (((x) % ((typeof(x))(a))) == 0)
-#endif
-
-#ifdef IS_ENABLED
-#undef IS_ENABLED
-#undef __ARG_PLACEHOLDER_1
-#undef config_enabled
-#undef _config_enabled
-#undef __config_enabled
-#undef ___config_enabled
-#endif
-
-#define __ARG_PLACEHOLDER_1 0,
-#define config_enabled(cfg) _config_enabled(cfg)
-#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value)
-#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0)
-#define ___config_enabled(__ignored, val, ...) val
-
-#define IS_ENABLED(option) \
-	(config_enabled(option) || config_enabled(option##_MODULE))
-
-#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX)
-struct _kc_vlan_ethhdr {
-	unsigned char	h_dest[ETH_ALEN];
-	unsigned char	h_source[ETH_ALEN];
-	__be16		h_vlan_proto;
-	__be16		h_vlan_TCI;
-	__be16		h_vlan_encapsulated_proto;
-};
-#define vlan_ethhdr _kc_vlan_ethhdr
-struct _kc_vlan_hdr {
-	__be16		h_vlan_TCI;
-	__be16		h_vlan_encapsulated_proto;
-};
-#define vlan_hdr _kc_vlan_hdr
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#define vlan_tx_tag_present(_skb) 0
-#define vlan_tx_tag_get(_skb) 0
-#endif
-#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */
-
-#ifndef VLAN_PRIO_SHIFT
-#define VLAN_PRIO_SHIFT 13
-#endif
-
-
-#ifndef __GFP_COLD
-#define __GFP_COLD 0
-#endif
-
-#ifndef __GFP_COMP
-#define __GFP_COMP 0
-#endif
-
-/*****************************************************************************/
-/* Installations with ethtool version without eeprom, adapter id, or statistics
- * support */
-
-#ifndef ETH_GSTRING_LEN
-#define ETH_GSTRING_LEN 32
-#endif
-
-#ifndef ETHTOOL_GSTATS
-#define ETHTOOL_GSTATS 0x1d
-#undef ethtool_drvinfo
-#define ethtool_drvinfo k_ethtool_drvinfo
-struct k_ethtool_drvinfo {
-	u32 cmd;
-	char driver[32];
-	char version[32];
-	char fw_version[32];
-	char bus_info[32];
-	char reserved1[32];
-	char reserved2[16];
-	u32 n_stats;
-	u32 testinfo_len;
-	u32 eedump_len;
-	u32 regdump_len;
-};
-
-struct ethtool_stats {
-	u32 cmd;
-	u32 n_stats;
-	u64 data[0];
-};
-#endif /* ETHTOOL_GSTATS */
-
-#ifndef ETHTOOL_PHYS_ID
-#define ETHTOOL_PHYS_ID 0x1c
-#endif /* ETHTOOL_PHYS_ID */
-
-#ifndef ETHTOOL_GSTRINGS
-#define ETHTOOL_GSTRINGS 0x1b
-enum ethtool_stringset {
-	ETH_SS_TEST             = 0,
-	ETH_SS_STATS,
-};
-struct ethtool_gstrings {
-	u32 cmd;            /* ETHTOOL_GSTRINGS */
-	u32 string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
-	u32 len;            /* number of strings in the string set */
-	u8 data[0];
-};
-#endif /* ETHTOOL_GSTRINGS */
-
-#ifndef ETHTOOL_TEST
-#define ETHTOOL_TEST 0x1a
-enum ethtool_test_flags {
-	ETH_TEST_FL_OFFLINE	= (1 << 0),
-	ETH_TEST_FL_FAILED	= (1 << 1),
-};
-struct ethtool_test {
-	u32 cmd;
-	u32 flags;
-	u32 reserved;
-	u32 len;
-	u64 data[0];
-};
-#endif /* ETHTOOL_TEST */
-
-#ifndef ETHTOOL_GEEPROM
-#define ETHTOOL_GEEPROM 0xb
-#undef ETHTOOL_GREGS
-struct ethtool_eeprom {
-	u32 cmd;
-	u32 magic;
-	u32 offset;
-	u32 len;
-	u8 data[0];
-};
-
-struct ethtool_value {
-	u32 cmd;
-	u32 data;
-};
-#endif /* ETHTOOL_GEEPROM */
-
-#ifndef ETHTOOL_GLINK
-#define ETHTOOL_GLINK 0xa
-#endif /* ETHTOOL_GLINK */
-
-#ifndef ETHTOOL_GWOL
-#define ETHTOOL_GWOL 0x5
-#define ETHTOOL_SWOL 0x6
-#define SOPASS_MAX      6
-struct ethtool_wolinfo {
-	u32 cmd;
-	u32 supported;
-	u32 wolopts;
-	u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
-};
-#endif /* ETHTOOL_GWOL */
-
-#ifndef ETHTOOL_GREGS
-#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
-#define ethtool_regs _kc_ethtool_regs
-/* for passing big chunks of data */
-struct _kc_ethtool_regs {
-	u32 cmd;
-	u32 version; /* driver-specific, indicates different chips/revs */
-	u32 len; /* bytes */
-	u8 data[0];
-};
-#endif /* ETHTOOL_GREGS */
-
-#ifndef ETHTOOL_GMSGLVL
-#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
-#endif
-#ifndef ETHTOOL_SMSGLVL
-#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
-#endif
-#ifndef ETHTOOL_NWAY_RST
-#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
-#endif
-#ifndef ETHTOOL_GLINK
-#define ETHTOOL_GLINK		0x0000000a /* Get link status */
-#endif
-#ifndef ETHTOOL_GEEPROM
-#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
-#endif
-#ifndef ETHTOOL_SEEPROM
-#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
-#endif
-#ifndef ETHTOOL_GCOALESCE
-#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
-/* for configuring coalescing parameters of chip */
-#define ethtool_coalesce _kc_ethtool_coalesce
-struct _kc_ethtool_coalesce {
-	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
-
-	/* How many usecs to delay an RX interrupt after
-	 * a packet arrives.  If 0, only rx_max_coalesced_frames
-	 * is used.
-	 */
-	u32	rx_coalesce_usecs;
-
-	/* How many packets to delay an RX interrupt after
-	 * a packet arrives.  If 0, only rx_coalesce_usecs is
-	 * used.  It is illegal to set both usecs and max frames
-	 * to zero as this would cause RX interrupts to never be
-	 * generated.
-	 */
-	u32	rx_max_coalesced_frames;
-
-	/* Same as above two parameters, except that these values
-	 * apply while an IRQ is being serviced by the host.  Not
-	 * all cards support this feature and the values are ignored
-	 * in that case.
-	 */
-	u32	rx_coalesce_usecs_irq;
-	u32	rx_max_coalesced_frames_irq;
-
-	/* How many usecs to delay a TX interrupt after
-	 * a packet is sent.  If 0, only tx_max_coalesced_frames
-	 * is used.
-	 */
-	u32	tx_coalesce_usecs;
-
-	/* How many packets to delay a TX interrupt after
-	 * a packet is sent.  If 0, only tx_coalesce_usecs is
-	 * used.  It is illegal to set both usecs and max frames
-	 * to zero as this would cause TX interrupts to never be
-	 * generated.
-	 */
-	u32	tx_max_coalesced_frames;
-
-	/* Same as above two parameters, except that these values
-	 * apply while an IRQ is being serviced by the host.  Not
-	 * all cards support this feature and the values are ignored
-	 * in that case.
-	 */
-	u32	tx_coalesce_usecs_irq;
-	u32	tx_max_coalesced_frames_irq;
-
-	/* How many usecs to delay in-memory statistics
-	 * block updates.  Some drivers do not have an in-memory
-	 * statistic block, and in such cases this value is ignored.
-	 * This value must not be zero.
-	 */
-	u32	stats_block_coalesce_usecs;
-
-	/* Adaptive RX/TX coalescing is an algorithm implemented by
-	 * some drivers to improve latency under low packet rates and
-	 * improve throughput under high packet rates.  Some drivers
-	 * only implement one of RX or TX adaptive coalescing.  Anything
-	 * not implemented by the driver causes these values to be
-	 * silently ignored.
-	 */
-	u32	use_adaptive_rx_coalesce;
-	u32	use_adaptive_tx_coalesce;
-
-	/* When the packet rate (measured in packets per second)
-	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
-	 * used.
-	 */
-	u32	pkt_rate_low;
-	u32	rx_coalesce_usecs_low;
-	u32	rx_max_coalesced_frames_low;
-	u32	tx_coalesce_usecs_low;
-	u32	tx_max_coalesced_frames_low;
-
-	/* When the packet rate is below pkt_rate_high but above
-	 * pkt_rate_low (both measured in packets per second) the
-	 * normal {rx,tx}_* coalescing parameters are used.
-	 */
-
-	/* When the packet rate is (measured in packets per second)
-	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
-	 * used.
-	 */
-	u32	pkt_rate_high;
-	u32	rx_coalesce_usecs_high;
-	u32	rx_max_coalesced_frames_high;
-	u32	tx_coalesce_usecs_high;
-	u32	tx_max_coalesced_frames_high;
-
-	/* How often to do adaptive coalescing packet rate sampling,
-	 * measured in seconds.  Must not be zero.
-	 */
-	u32	rate_sample_interval;
-};
-#endif /* ETHTOOL_GCOALESCE */
-
-#ifndef ETHTOOL_SCOALESCE
-#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
-#endif
-#ifndef ETHTOOL_GRINGPARAM
-#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
-/* for configuring RX/TX ring parameters */
-#define ethtool_ringparam _kc_ethtool_ringparam
-struct _kc_ethtool_ringparam {
-	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
-
-	/* Read only attributes.  These indicate the maximum number
-	 * of pending RX/TX ring entries the driver will allow the
-	 * user to set.
-	 */
-	u32	rx_max_pending;
-	u32	rx_mini_max_pending;
-	u32	rx_jumbo_max_pending;
-	u32	tx_max_pending;
-
-	/* Values changeable by the user.  The valid values are
-	 * in the range 1 to the "*_max_pending" counterpart above.
-	 */
-	u32	rx_pending;
-	u32	rx_mini_pending;
-	u32	rx_jumbo_pending;
-	u32	tx_pending;
-};
-#endif /* ETHTOOL_GRINGPARAM */
-
-#ifndef ETHTOOL_SRINGPARAM
-#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
-#endif
-#ifndef ETHTOOL_GPAUSEPARAM
-#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
-/* for configuring link flow control parameters */
-#define ethtool_pauseparam _kc_ethtool_pauseparam
-struct _kc_ethtool_pauseparam {
-	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
-
-	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
-	 * being true) the user may set 'autoneg' here non-zero to have the
-	 * pause parameters be auto-negotiated too.  In such a case, the
-	 * {rx,tx}_pause values below determine what capabilities are
-	 * advertised.
-	 *
-	 * If 'autoneg' is zero or the link is not being auto-negotiated,
-	 * then {rx,tx}_pause force the driver to use/not-use pause
-	 * flow control.
-	 */
-	u32	autoneg;
-	u32	rx_pause;
-	u32	tx_pause;
-};
-#endif /* ETHTOOL_GPAUSEPARAM */
-
-#ifndef ETHTOOL_SPAUSEPARAM
-#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
-#endif
-#ifndef ETHTOOL_GRXCSUM
-#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_SRXCSUM
-#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_GTXCSUM
-#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_STXCSUM
-#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_GSG
-#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
-					    * (ethtool_value) */
-#endif
-#ifndef ETHTOOL_SSG
-#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
-					    * (ethtool_value). */
-#endif
-#ifndef ETHTOOL_TEST
-#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
-#endif
-#ifndef ETHTOOL_GSTRINGS
-#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
-#endif
-#ifndef ETHTOOL_PHYS_ID
-#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
-#endif
-#ifndef ETHTOOL_GSTATS
-#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
-#endif
-#ifndef ETHTOOL_GTSO
-#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_STSO
-#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
-#endif
-
-#ifndef ETHTOOL_BUSINFO_LEN
-#define ETHTOOL_BUSINFO_LEN	32
-#endif
-
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
-#endif
-#ifndef AX_RELEASE_VERSION
-#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
-#endif
-
-#ifndef AX_RELEASE_CODE
-#define AX_RELEASE_CODE 0
-#endif
-
-#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0))
-#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0)
-#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1))
-#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1)
-#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2))
-#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3)
-#endif
-
-#ifndef RHEL_RELEASE_CODE
-/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
-#define RHEL_RELEASE_CODE 0
-#endif
-
-/* SuSE version macro is the same as Linux kernel version */
-#ifndef SLE_VERSION
-#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
-#endif
-#ifdef CONFIG_SUSE_KERNEL
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
-/* SLES12SP3 is at least 4.4.57+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
-#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
-/* SLES12 is at least 3.12.28+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
-#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \
-       (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)))
-/* SLES11 SP3 is at least 3.0.61+ based */
-#define SLE_VERSION_CODE SLE_VERSION(11,3,0)
-#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
-/* SLES11 SP1 is 2.6.32 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
-#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
-/* SLES11 GA is 2.6.27 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
-#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
-#endif /* CONFIG_SUSE_KERNEL */
-#ifndef SLE_VERSION_CODE
-#define SLE_VERSION_CODE 0
-#endif /* SLE_VERSION_CODE */
-
-/* Ubuntu release and kernel codes must be specified from Makefile */
-#ifndef UBUNTU_RELEASE_VERSION
-#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b))
-#endif
-#ifndef UBUNTU_KERNEL_VERSION
-#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload))
-#endif
-#ifndef UBUNTU_RELEASE_CODE
-#define UBUNTU_RELEASE_CODE 0
-#endif
-#ifndef UBUNTU_KERNEL_CODE
-#define UBUNTU_KERNEL_CODE 0
-#endif
-
-#ifdef __KLOCWORK__
-#ifdef ARRAY_SIZE
-#undef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-#endif /* __KLOCWORK__ */
-
-/*****************************************************************************/
-/* 2.4.3 => 2.4.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-#ifndef pci_set_dma_mask
-#define pci_set_dma_mask _kc_pci_set_dma_mask
-extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
-#endif
-
-#ifndef pci_request_regions
-#define pci_request_regions _kc_pci_request_regions
-extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
-#endif
-
-#ifndef pci_release_regions
-#define pci_release_regions _kc_pci_release_regions
-extern void _kc_pci_release_regions(struct pci_dev *pdev);
-#endif
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-#ifndef alloc_etherdev
-#define alloc_etherdev _kc_alloc_etherdev
-extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
-#endif
-
-#ifndef is_valid_ether_addr
-#define is_valid_ether_addr _kc_is_valid_ether_addr
-extern int _kc_is_valid_ether_addr(u8 *addr);
-#endif
-
-/**************************************/
-/* MISCELLANEOUS */
-
-#ifndef INIT_TQUEUE
-#define INIT_TQUEUE(_tq, _routine, _data)		\
-	do {						\
-		INIT_LIST_HEAD(&(_tq)->list);		\
-		(_tq)->sync = 0;			\
-		(_tq)->routine = _routine;		\
-		(_tq)->data = _data;			\
-	} while (0)
-#endif
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
-/* Generic MII registers. */
-#define MII_BMCR            0x00        /* Basic mode control register */
-#define MII_BMSR            0x01        /* Basic mode status register  */
-#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
-#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
-#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
-#define MII_LPA             0x05        /* Link partner ability reg    */
-#define MII_EXPANSION       0x06        /* Expansion register          */
-/* Basic mode control register. */
-#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
-#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
-/* Basic mode status register. */
-#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
-#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
-#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
-#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
-#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
-#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
-/* Advertisement control register. */
-#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
-#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
-#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
-#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
-#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
-#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
-                       ADVERTISE_100HALF | ADVERTISE_100FULL)
-/* Expansion register for auto-negotiation. */
-#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
-#endif
-
-/*****************************************************************************/
-/* 2.4.6 => 2.4.3 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-#ifndef pci_set_power_state
-#define pci_set_power_state _kc_pci_set_power_state
-extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
-#endif
-
-#ifndef pci_enable_wake
-#define pci_enable_wake _kc_pci_enable_wake
-extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
-#endif
-
-#ifndef pci_disable_device
-#define pci_disable_device _kc_pci_disable_device
-extern void _kc_pci_disable_device(struct pci_dev *pdev);
-#endif
-
-/* PCI PM entry point syntax changed, so don't support suspend/resume */
-#undef CONFIG_PM
-
-#endif /* 2.4.6 => 2.4.3 */
-
-#ifndef HAVE_PCI_SET_MWI
-#define pci_set_mwi(X) pci_write_config_word(X, \
-			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
-			       PCI_COMMAND_INVALIDATE);
-#define pci_clear_mwi(X) pci_write_config_word(X, \
-			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
-			       ~PCI_COMMAND_INVALIDATE);
-#endif
-
-/*****************************************************************************/
-/* 2.4.10 => 2.4.9 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
-
-/**************************************/
-/* MODULE API */
-
-#ifndef MODULE_LICENSE
-	#define MODULE_LICENSE(X)
-#endif
-
-/**************************************/
-/* OTHER */
-
-#undef min
-#define min(x,y) ({ \
-	const typeof(x) _x = (x);	\
-	const typeof(y) _y = (y);	\
-	(void) (&_x == &_y);		\
-	_x < _y ? _x : _y; })
-
-#undef max
-#define max(x,y) ({ \
-	const typeof(x) _x = (x);	\
-	const typeof(y) _y = (y);	\
-	(void) (&_x == &_y);		\
-	_x > _y ? _x : _y; })
-
-#define min_t(type,x,y) ({ \
-	type _x = (x); \
-	type _y = (y); \
-	_x < _y ? _x : _y; })
-
-#define max_t(type,x,y) ({ \
-	type _x = (x); \
-	type _y = (y); \
-	_x > _y ? _x : _y; })
-
-#ifndef list_for_each_safe
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-#endif
-
-#ifndef ____cacheline_aligned_in_smp
-#ifdef CONFIG_SMP
-#define ____cacheline_aligned_in_smp ____cacheline_aligned
-#else
-#define ____cacheline_aligned_in_smp
-#endif /* CONFIG_SMP */
-#endif
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
-#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
-extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
-#else /* 2.4.8 => 2.4.9 */
-extern int snprintf(char * buf, size_t size, const char *fmt, ...);
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-#endif
-#endif /* 2.4.10 -> 2.4.6 */
-
-
-/*****************************************************************************/
-/* 2.4.12 => 2.4.10 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
-#ifndef HAVE_NETIF_MSG
-#define HAVE_NETIF_MSG 1
-enum {
-	NETIF_MSG_DRV		= 0x0001,
-	NETIF_MSG_PROBE		= 0x0002,
-	NETIF_MSG_LINK		= 0x0004,
-	NETIF_MSG_TIMER		= 0x0008,
-	NETIF_MSG_IFDOWN	= 0x0010,
-	NETIF_MSG_IFUP		= 0x0020,
-	NETIF_MSG_RX_ERR	= 0x0040,
-	NETIF_MSG_TX_ERR	= 0x0080,
-	NETIF_MSG_TX_QUEUED	= 0x0100,
-	NETIF_MSG_INTR		= 0x0200,
-	NETIF_MSG_TX_DONE	= 0x0400,
-	NETIF_MSG_RX_STATUS	= 0x0800,
-	NETIF_MSG_PKTDATA	= 0x1000,
-	NETIF_MSG_HW		= 0x2000,
-	NETIF_MSG_WOL		= 0x4000,
-};
-
-#define netif_msg_drv(p)	((p)->msg_enable & NETIF_MSG_DRV)
-#define netif_msg_probe(p)	((p)->msg_enable & NETIF_MSG_PROBE)
-#define netif_msg_link(p)	((p)->msg_enable & NETIF_MSG_LINK)
-#define netif_msg_timer(p)	((p)->msg_enable & NETIF_MSG_TIMER)
-#define netif_msg_ifdown(p)	((p)->msg_enable & NETIF_MSG_IFDOWN)
-#define netif_msg_ifup(p)	((p)->msg_enable & NETIF_MSG_IFUP)
-#define netif_msg_rx_err(p)	((p)->msg_enable & NETIF_MSG_RX_ERR)
-#define netif_msg_tx_err(p)	((p)->msg_enable & NETIF_MSG_TX_ERR)
-#define netif_msg_tx_queued(p)	((p)->msg_enable & NETIF_MSG_TX_QUEUED)
-#define netif_msg_intr(p)	((p)->msg_enable & NETIF_MSG_INTR)
-#define netif_msg_tx_done(p)	((p)->msg_enable & NETIF_MSG_TX_DONE)
-#define netif_msg_rx_status(p)	((p)->msg_enable & NETIF_MSG_RX_STATUS)
-#define netif_msg_pktdata(p)	((p)->msg_enable & NETIF_MSG_PKTDATA)
-#endif /* !HAVE_NETIF_MSG */
-#endif /* 2.4.12 => 2.4.10 */
-
-/*****************************************************************************/
-/* 2.4.13 => 2.4.12 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#ifndef virt_to_page
-	#define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
-#endif
-
-#ifndef pci_map_page
-#define pci_map_page _kc_pci_map_page
-extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
-#endif
-
-#ifndef pci_unmap_page
-#define pci_unmap_page _kc_pci_unmap_page
-extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
-#endif
-
-/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
-
-#undef DMA_32BIT_MASK
-#define DMA_32BIT_MASK	0xffffffff
-#undef DMA_64BIT_MASK
-#define DMA_64BIT_MASK	0xffffffff
-
-/**************************************/
-/* OTHER */
-
-#ifndef cpu_relax
-#define cpu_relax()	rep_nop()
-#endif
-
-struct vlan_ethhdr {
-	unsigned char h_dest[ETH_ALEN];
-	unsigned char h_source[ETH_ALEN];
-	unsigned short h_vlan_proto;
-	unsigned short h_vlan_TCI;
-	unsigned short h_vlan_encapsulated_proto;
-};
-#endif /* 2.4.13 => 2.4.12 */
-
-/*****************************************************************************/
-/* 2.4.17 => 2.4.12 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
-
-#ifndef __devexit_p
-	#define __devexit_p(x) &(x)
-#endif
-
-#else
-        /* For Kernel 3.8 these are not defined - so undefine all */
-        #undef __devexit_p
-        #undef __devexit
-        #undef __devinit
-        #undef __devinitdata
-        #define __devexit_p(x) &(x)
-        #define __devexit
-        #define __devinit
-        #define __devinitdata
-
-#endif /* 2.4.17 => 2.4.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
-#define NETIF_MSG_HW	0x2000
-#define NETIF_MSG_WOL	0x4000
-
-#ifndef netif_msg_hw
-#define netif_msg_hw(p)		((p)->msg_enable & NETIF_MSG_HW)
-#endif
-#ifndef netif_msg_wol
-#define netif_msg_wol(p)	((p)->msg_enable & NETIF_MSG_WOL)
-#endif
-#endif /* 2.4.18 */
-
-/*****************************************************************************/
-
-/*****************************************************************************/
-/* 2.4.20 => 2.4.19 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
-
-/* we won't support NAPI on less than 2.4.20 */
-#ifdef NAPI
-#undef NAPI
-#endif
-
-#endif /* 2.4.20 => 2.4.19 */
-
-/*****************************************************************************/
-/* 2.4.22 => 2.4.17 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
-#define pci_name(x)	((x)->slot_name)
-
-#ifndef SUPPORTED_10000baseT_Full
-#define SUPPORTED_10000baseT_Full	(1 << 12)
-#endif
-#ifndef ADVERTISED_10000baseT_Full
-#define ADVERTISED_10000baseT_Full	(1 << 12)
-#endif
-#endif
-
-/*****************************************************************************/
-/* 2.4.22 => 2.4.17 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
-#ifndef IGB_NO_LRO
-#define IGB_NO_LRO
-#endif
-#endif
-
-/*****************************************************************************/
-/*****************************************************************************/
-/* 2.4.23 => 2.4.22 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
-/*****************************************************************************/
-#ifdef NAPI
-#ifndef netif_poll_disable
-#define netif_poll_disable(x) _kc_netif_poll_disable(x)
-static inline void _kc_netif_poll_disable(struct net_device *netdev)
-{
-	while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
-		/* No hurry */
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(1);
-	}
-}
-#endif
-#ifndef netif_poll_enable
-#define netif_poll_enable(x) _kc_netif_poll_enable(x)
-static inline void _kc_netif_poll_enable(struct net_device *netdev)
-{
-	clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
-}
-#endif
-#endif /* NAPI */
-#ifndef netif_tx_disable
-#define netif_tx_disable(x) _kc_netif_tx_disable(x)
-static inline void _kc_netif_tx_disable(struct net_device *dev)
-{
-	spin_lock_bh(&dev->xmit_lock);
-	netif_stop_queue(dev);
-	spin_unlock_bh(&dev->xmit_lock);
-}
-#endif
-#else /* 2.4.23 => 2.4.22 */
-#define HAVE_SCTP
-#endif /* 2.4.23 => 2.4.22 */
-
-/*****************************************************************************/
-/* 2.6.4 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
-    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
-      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
-#define ETHTOOL_OPS_COMPAT
-#endif /* 2.6.4 => 2.6.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) )
-#define __user
-#endif /* < 2.4.27 */
-
-/*****************************************************************************/
-/* 2.5.71 => 2.4.x */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
-#define sk_protocol protocol
-#define pci_get_device pci_find_device
-#endif /* 2.5.70 => 2.4.x */
-
-/*****************************************************************************/
-/* < 2.4.27 or 2.6.0 <= 2.6.5 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
-    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
-      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
-
-#ifndef netif_msg_init
-#define netif_msg_init _kc_netif_msg_init
-static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
-{
-	/* use default */
-	if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
-		return default_msg_enable_bits;
-	if (debug_value == 0) /* no output */
-		return 0;
-	/* set low N bits */
-	return (1 << debug_value) -1;
-}
-#endif
-
-#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
-/*****************************************************************************/
-#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
-     (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
-      ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
-#define netdev_priv(x) x->priv
-#endif
-
-/*****************************************************************************/
-/* <= 2.5.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
-#include <linux/rtnetlink.h>
-#undef pci_register_driver
-#define pci_register_driver pci_module_init
-
-/*
- * Most of the dma compat code is copied/modified from the 2.4.37
- * /include/linux/libata-compat.h header file
- */
-/* These definitions mirror those in pci.h, so they can be used
- * interchangeably with their PCI_ counterparts */
-enum dma_data_direction {
-	DMA_BIDIRECTIONAL = 0,
-	DMA_TO_DEVICE = 1,
-	DMA_FROM_DEVICE = 2,
-	DMA_NONE = 3,
-};
-
-struct device {
-	struct pci_dev pdev;
-};
-
-static inline struct pci_dev *to_pci_dev (struct device *dev)
-{
-	return (struct pci_dev *) dev;
-}
-static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
-{
-	return (struct device *) pdev;
-}
-
-#define pdev_printk(lvl, pdev, fmt, args...)	\
-	printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
-#define dev_err(dev, fmt, args...)            \
-	pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
-#define dev_info(dev, fmt, args...)            \
-	pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
-#define dev_warn(dev, fmt, args...)            \
-	pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
-#define dev_notice(dev, fmt, args...)            \
-	pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args)
-#define dev_dbg(dev, fmt, args...) \
-	pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args)
-
-/* NOTE: dangerous! we ignore the 'gfp' argument */
-#define dma_alloc_coherent(dev,sz,dma,gfp) \
-	pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
-#define dma_free_coherent(dev,sz,addr,dma_addr) \
-	pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
-
-#define dma_map_page(dev,a,b,c,d) \
-	pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
-#define dma_unmap_page(dev,a,b,c) \
-	pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
-
-#define dma_map_single(dev,a,b,c) \
-	pci_map_single(to_pci_dev(dev),(a),(b),(c))
-#define dma_unmap_single(dev,a,b,c) \
-	pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
-
-#define dma_map_sg(dev, sg, nents, dir) \
-	pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir)
-#define dma_unmap_sg(dev, sg, nents, dir) \
-	pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir)
-
-#define dma_sync_single(dev,a,b,c) \
-	pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
-
-/* for range just sync everything, that's all the pci API can do */
-#define dma_sync_single_range(dev,addr,off,sz,dir) \
-	pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
-
-#define dma_set_mask(dev,mask) \
-	pci_set_dma_mask(to_pci_dev(dev),(mask))
-
-/* hlist_* code - double linked lists */
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-	next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = NULL;
-	n->pprev = NULL;
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-
-#ifndef might_sleep
-#define might_sleep()
-#endif
-#else
-static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
-{
-	return &pdev->dev;
-}
-#endif /* <= 2.5.0 */
-
-/*****************************************************************************/
-/* 2.5.28 => 2.4.23 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
-
-#include <linux/tqueue.h>
-#define work_struct tq_struct
-#undef INIT_WORK
-#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
-#undef container_of
-#define container_of list_entry
-#define schedule_work schedule_task
-#define flush_scheduled_work flush_scheduled_tasks
-#define cancel_work_sync(x) flush_scheduled_work()
-
-#endif /* 2.5.28 => 2.4.17 */
-
-/*****************************************************************************/
-/* 2.6.0 => 2.5.28 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-#ifndef read_barrier_depends
-#define read_barrier_depends() rmb()
-#endif
-
-#undef get_cpu
-#define get_cpu() smp_processor_id()
-#undef put_cpu
-#define put_cpu() do { } while(0)
-#define MODULE_INFO(version, _version)
-#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
-#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
-#endif
-#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
-#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
-#endif
-
-#define dma_set_coherent_mask(dev,mask) 1
-
-#undef dev_put
-#define dev_put(dev) __dev_put(dev)
-
-#ifndef skb_fill_page_desc
-#define skb_fill_page_desc _kc_skb_fill_page_desc
-extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
-#endif
-
-#undef ALIGN
-#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
-
-#ifndef page_count
-#define page_count(p) atomic_read(&(p)->count)
-#endif
-
-#ifdef MAX_NUMNODES
-#undef MAX_NUMNODES
-#endif
-#define MAX_NUMNODES 1
-
-/* find_first_bit and find_next bit are not defined for most
- * 2.4 kernels (except for the redhat 2.4.21 kernels
- */
-#include <linux/bitops.h>
-#define BITOP_WORD(nr)          ((nr) / BITS_PER_LONG)
-#undef find_next_bit
-#define find_next_bit _kc_find_next_bit
-extern unsigned long _kc_find_next_bit(const unsigned long *addr,
-                                       unsigned long size,
-                                       unsigned long offset);
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-
-
-#ifndef netdev_name
-static inline const char *_kc_netdev_name(const struct net_device *dev)
-{
-	if (strchr(dev->name, '%'))
-		return "(unregistered net_device)";
-	return dev->name;
-}
-#define netdev_name(netdev)	_kc_netdev_name(netdev)
-#endif /* netdev_name */
-
-#ifndef strlcpy
-#define strlcpy _kc_strlcpy
-extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
-#endif /* strlcpy */
-
-#ifndef do_div
-#if BITS_PER_LONG == 64
-# define do_div(n,base) ({					\
-	uint32_t __base = (base);				\
-	uint32_t __rem;						\
-	__rem = ((uint64_t)(n)) % __base;			\
-	(n) = ((uint64_t)(n)) / __base;				\
-	__rem;							\
- })
-#elif BITS_PER_LONG == 32
-extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor);
-# define do_div(n,base) ({				\
-	uint32_t __base = (base);			\
-	uint32_t __rem;					\
-	if (likely(((n) >> 32) == 0)) {			\
-		__rem = (uint32_t)(n) % __base;		\
-		(n) = (uint32_t)(n) / __base;		\
-	} else 						\
-		__rem = _kc__div64_32(&(n), __base);	\
-	__rem;						\
- })
-#else /* BITS_PER_LONG == ?? */
-# error do_div() does not yet support the C64
-#endif /* BITS_PER_LONG */
-#endif /* do_div */
-
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC	1000000000L
-#endif
-
-#undef HAVE_I2C_SUPPORT
-#else /* 2.6.0 */
-#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \
-	(RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9)))
-#define HAVE_I2C_SUPPORT
-#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */
-
-#endif /* 2.6.0 => 2.5.28 */
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )
-#define dma_pool pci_pool
-#define dma_pool_destroy pci_pool_destroy
-#define dma_pool_alloc pci_pool_alloc
-#define dma_pool_free pci_pool_free
-
-#define dma_pool_create(name,dev,size,align,allocation) \
-       pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation))
-#endif /* < 2.6.3 */
-
-/*****************************************************************************/
-/* 2.6.4 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
-#endif /* 2.6.4 => 2.6.0 */
-
-/*****************************************************************************/
-/* 2.6.5 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
-#define dma_sync_single_for_cpu		dma_sync_single
-#define dma_sync_single_for_device	dma_sync_single
-#define dma_sync_single_range_for_cpu		dma_sync_single_range
-#define dma_sync_single_range_for_device	dma_sync_single_range
-#ifndef pci_dma_mapping_error
-#define pci_dma_mapping_error _kc_pci_dma_mapping_error
-static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
-{
-	return dma_addr == 0;
-}
-#endif
-#endif /* 2.6.5 => 2.6.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
-#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
-/* taken from 2.6 include/linux/bitmap.h */
-#undef bitmap_zero
-#define bitmap_zero _kc_bitmap_zero
-static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
-{
-        if (nbits <= BITS_PER_LONG)
-                *dst = 0UL;
-        else {
-                int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-                memset(dst, 0, len);
-        }
-}
-#define random_ether_addr _kc_random_ether_addr
-static inline void _kc_random_ether_addr(u8 *addr)
-{
-        get_random_bytes(addr, ETH_ALEN);
-        addr[0] &= 0xfe; /* clear multicast */
-        addr[0] |= 0x02; /* set local assignment */
-}
-#define page_to_nid(x) 0
-
-#endif /* < 2.6.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
-#undef if_mii
-#define if_mii _kc_if_mii
-static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
-{
-	return (struct mii_ioctl_data *) &rq->ifr_ifru;
-}
-
-#ifndef __force
-#define __force
-#endif
-#endif /* < 2.6.7 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
-#ifndef PCI_EXP_DEVCTL
-#define PCI_EXP_DEVCTL 8
-#endif
-#ifndef PCI_EXP_DEVCTL_CERE
-#define PCI_EXP_DEVCTL_CERE 0x0001
-#endif
-#define PCI_EXP_FLAGS		2	/* Capabilities register */
-#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
-#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
-#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
-#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
-#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
-#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
-#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
-#define PCI_EXP_DEVCAP		4	/* Device capabilities */
-#define PCI_EXP_DEVSTA		10	/* Device Status */
-#define msleep(x)	do { set_current_state(TASK_UNINTERRUPTIBLE); \
-				schedule_timeout((x * HZ)/1000 + 2); \
-			} while (0)
-
-#endif /* < 2.6.8 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
-#include <net/dsfield.h>
-#define __iomem
-
-#ifndef kcalloc
-#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
-extern void *_kc_kzalloc(size_t size, int flags);
-#endif
-#define MSEC_PER_SEC    1000L
-static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
-{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-	return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
-#else
-	return (j * MSEC_PER_SEC) / HZ;
-#endif
-}
-static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
-{
-	if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
-		return MAX_JIFFY_OFFSET;
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-	return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-	return m * (HZ / MSEC_PER_SEC);
-#else
-	return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
-#endif
-}
-
-#define msleep_interruptible _kc_msleep_interruptible
-static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
-{
-	unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
-
-	while (timeout && !signal_pending(current)) {
-		__set_current_state(TASK_INTERRUPTIBLE);
-		timeout = schedule_timeout(timeout);
-	}
-	return _kc_jiffies_to_msecs(timeout);
-}
-
-/* Basic mode control register. */
-#define BMCR_SPEED1000		0x0040  /* MSB of Speed (1000)         */
-
-#ifndef __le16
-#define __le16 u16
-#endif
-#ifndef __le32
-#define __le32 u32
-#endif
-#ifndef __le64
-#define __le64 u64
-#endif
-#ifndef __be16
-#define __be16 u16
-#endif
-#ifndef __be32
-#define __be32 u32
-#endif
-#ifndef __be64
-#define __be64 u64
-#endif
-
-static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
-{
-	return (struct vlan_ethhdr *)skb->mac.raw;
-}
-
-/* Wake-On-Lan options. */
-#define WAKE_PHY		(1 << 0)
-#define WAKE_UCAST		(1 << 1)
-#define WAKE_MCAST		(1 << 2)
-#define WAKE_BCAST		(1 << 3)
-#define WAKE_ARP		(1 << 4)
-#define WAKE_MAGIC		(1 << 5)
-#define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
-
-#define skb_header_pointer _kc_skb_header_pointer
-static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
-					    int offset, int len, void *buffer)
-{
-	int hlen = skb_headlen(skb);
-
-	if (hlen - offset >= len)
-		return skb->data + offset;
-
-#ifdef MAX_SKB_FRAGS
-	if (skb_copy_bits(skb, offset, buffer, len) < 0)
-		return NULL;
-
-	return buffer;
-#else
-	return NULL;
-#endif
-
-#ifndef NETDEV_TX_OK
-#define NETDEV_TX_OK 0
-#endif
-#ifndef NETDEV_TX_BUSY
-#define NETDEV_TX_BUSY 1
-#endif
-#ifndef NETDEV_TX_LOCKED
-#define NETDEV_TX_LOCKED -1
-#endif
-}
-
-#ifndef __bitwise
-#define __bitwise
-#endif
-#endif /* < 2.6.9 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-#ifdef module_param_array_named
-#undef module_param_array_named
-#define module_param_array_named(name, array, type, nump, perm)          \
-	static struct kparam_array __param_arr_##name                    \
-	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
-	    sizeof(array[0]), array };                                   \
-	module_param_call(name, param_array_set, param_array_get,        \
-			  &__param_arr_##name, perm)
-#endif /* module_param_array_named */
-/*
- * num_online is broken for all < 2.6.10 kernels.  This is needed to support
- * Node module parameter of ixgbe.
- */
-#undef num_online_nodes
-#define num_online_nodes(n) 1
-extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
-#undef node_online_map
-#define node_online_map _kcompat_node_online_map
-#define pci_get_class pci_find_class
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
-#define PCI_D0      0
-#define PCI_D1      1
-#define PCI_D2      2
-#define PCI_D3hot   3
-#define PCI_D3cold  4
-typedef int pci_power_t;
-#define pci_choose_state(pdev,state) state
-#define PMSG_SUSPEND 3
-#define PCI_EXP_LNKCTL	16
-
-#undef NETIF_F_LLTX
-
-#ifndef ARCH_HAS_PREFETCH
-#define prefetch(X)
-#endif
-
-#ifndef NET_IP_ALIGN
-#define NET_IP_ALIGN 2
-#endif
-
-#define KC_USEC_PER_SEC	1000000L
-#define usecs_to_jiffies _kc_usecs_to_jiffies
-static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
-{
-#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
-	return (KC_USEC_PER_SEC / HZ) * j;
-#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
-	return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
-#else
-	return (j * KC_USEC_PER_SEC) / HZ;
-#endif
-}
-static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
-{
-	if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
-		return MAX_JIFFY_OFFSET;
-#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
-	return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
-#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
-	return m * (HZ / KC_USEC_PER_SEC);
-#else
-	return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
-#endif
-}
-
-#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
-#define PCI_EXP_LNKSTA		18	/* Link Status */
-#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
-#define PCI_EXP_SLTCTL		24	/* Slot Control */
-#define PCI_EXP_SLTSTA		26	/* Slot Status */
-#define PCI_EXP_RTCTL		28	/* Root Control */
-#define PCI_EXP_RTCAP		30	/* Root Capabilities */
-#define PCI_EXP_RTSTA		32	/* Root Status */
-#endif /* < 2.6.11 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
-#include <linux/reboot.h>
-#define USE_REBOOT_NOTIFIER
-
-/* Generic MII registers. */
-#define MII_CTRL1000        0x09        /* 1000BASE-T control          */
-#define MII_STAT1000        0x0a        /* 1000BASE-T status           */
-/* Advertisement control register. */
-#define ADVERTISE_PAUSE_CAP     0x0400  /* Try for pause               */
-#define ADVERTISE_PAUSE_ASYM    0x0800  /* Try for asymmetric pause     */
-/* Link partner ability register. */
-#define LPA_PAUSE_CAP		0x0400	/* Can pause                   */
-#define LPA_PAUSE_ASYM		0x0800	/* Can pause asymetrically     */
-/* 1000BASE-T Control register */
-#define ADVERTISE_1000FULL      0x0200  /* Advertise 1000BASE-T full duplex */
-#define ADVERTISE_1000HALF	0x0100  /* Advertise 1000BASE-T half duplex */
-/* 1000BASE-T Status register */
-#define LPA_1000LOCALRXOK	0x2000	/* Link partner local receiver status */
-#define LPA_1000REMRXOK		0x1000	/* Link partner remote receiver status */
-
-#ifndef is_zero_ether_addr
-#define is_zero_ether_addr _kc_is_zero_ether_addr
-static inline int _kc_is_zero_ether_addr(const u8 *addr)
-{
-	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
-}
-#endif /* is_zero_ether_addr */
-#ifndef is_multicast_ether_addr
-#define is_multicast_ether_addr _kc_is_multicast_ether_addr
-static inline int _kc_is_multicast_ether_addr(const u8 *addr)
-{
-	return addr[0] & 0x01;
-}
-#endif /* is_multicast_ether_addr */
-#endif /* < 2.6.12 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-#ifndef kstrdup
-#define kstrdup _kc_kstrdup
-extern char *_kc_kstrdup(const char *s, unsigned int gfp);
-#endif
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-#define pm_message_t u32
-#ifndef kzalloc
-#define kzalloc _kc_kzalloc
-extern void *_kc_kzalloc(size_t size, int flags);
-#endif
-
-/* Generic MII registers. */
-#define MII_ESTATUS	    0x0f	/* Extended Status */
-/* Basic mode status register. */
-#define BMSR_ESTATEN		0x0100	/* Extended Status in R15 */
-/* Extended status register. */
-#define ESTATUS_1000_TFULL	0x2000	/* Can do 1000BT Full */
-#define ESTATUS_1000_THALF	0x1000	/* Can do 1000BT Half */
-
-#define SUPPORTED_Pause	        (1 << 13)
-#define SUPPORTED_Asym_Pause	(1 << 14)
-#define ADVERTISED_Pause	(1 << 13)
-#define ADVERTISED_Asym_Pause	(1 << 14)
-
-#if (!(RHEL_RELEASE_CODE && \
-       (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
-       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
-#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
-#define gfp_t unsigned
-#else
-typedef unsigned gfp_t;
-#endif
-#endif /* !RHEL4.3->RHEL5.0 */
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
-#ifdef CONFIG_X86_64
-#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir)       \
-	dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir))
-#define dma_sync_single_range_for_device(dev, addr, off, sz, dir)    \
-	dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir))
-#endif
-#endif
-#endif /* < 2.6.14 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
-#ifndef vmalloc_node
-#define vmalloc_node(a,b) vmalloc(a)
-#endif /* vmalloc_node*/
-
-#define setup_timer(_timer, _function, _data) \
-do { \
-	(_timer)->function = _function; \
-	(_timer)->data = _data; \
-	init_timer(_timer); \
-} while (0)
-#ifndef device_can_wakeup
-#define device_can_wakeup(dev)	(1)
-#endif
-#ifndef device_set_wakeup_enable
-#define device_set_wakeup_enable(dev, val)	do{}while(0)
-#endif
-#ifndef device_init_wakeup
-#define device_init_wakeup(dev,val) do {} while (0)
-#endif
-static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
-{
-	const u16 *a = (const u16 *) addr1;
-	const u16 *b = (const u16 *) addr2;
-
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
-}
-#undef compare_ether_addr
-#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
-#endif /* < 2.6.15 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
-#undef DEFINE_MUTEX
-#define DEFINE_MUTEX(x)	DECLARE_MUTEX(x)
-#define mutex_lock(x)	down_interruptible(x)
-#define mutex_unlock(x)	up(x)
-
-#ifndef ____cacheline_internodealigned_in_smp
-#ifdef CONFIG_SMP
-#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
-#else
-#define ____cacheline_internodealigned_in_smp
-#endif /* CONFIG_SMP */
-#endif /* ____cacheline_internodealigned_in_smp */
-#undef HAVE_PCI_ERS
-#else /* 2.6.16 and above */
-#undef HAVE_PCI_ERS
-#define HAVE_PCI_ERS
-#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) )
-#ifdef device_can_wakeup
-#undef device_can_wakeup
-#endif /* device_can_wakeup */
-#define device_can_wakeup(dev) 1
-#endif /* SLE_VERSION(10,4,0) */
-#endif /* < 2.6.16 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
-#ifndef dev_notice
-#define dev_notice(dev, fmt, args...)            \
-	dev_printk(KERN_NOTICE, dev, fmt, ## args)
-#endif
-
-#ifndef first_online_node
-#define first_online_node 0
-#endif
-#ifndef NET_SKB_PAD
-#define NET_SKB_PAD 16
-#endif
-#endif /* < 2.6.17 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
-
-#ifndef IRQ_HANDLED
-#define irqreturn_t void
-#define IRQ_HANDLED
-#define IRQ_NONE
-#endif
-
-#ifndef IRQF_PROBE_SHARED
-#ifdef SA_PROBEIRQ
-#define IRQF_PROBE_SHARED SA_PROBEIRQ
-#else
-#define IRQF_PROBE_SHARED 0
-#endif
-#endif
-
-#ifndef IRQF_SHARED
-#define IRQF_SHARED SA_SHIRQ
-#endif
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-#ifndef FIELD_SIZEOF
-#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-#endif
-
-#ifndef skb_is_gso
-#ifdef NETIF_F_TSO
-#define skb_is_gso _kc_skb_is_gso
-static inline int _kc_skb_is_gso(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->gso_size;
-}
-#else
-#define skb_is_gso(a) 0
-#endif
-#endif
-
-#ifndef resource_size_t
-#define resource_size_t unsigned long
-#endif
-
-#ifdef skb_pad
-#undef skb_pad
-#endif
-#define skb_pad(x,y) _kc_skb_pad(x, y)
-int _kc_skb_pad(struct sk_buff *skb, int pad);
-#ifdef skb_padto
-#undef skb_padto
-#endif
-#define skb_padto(x,y) _kc_skb_padto(x, y)
-static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
-{
-	unsigned int size = skb->len;
-	if(likely(size >= len))
-		return 0;
-	return _kc_skb_pad(skb, len - size);
-}
-
-#ifndef DECLARE_PCI_UNMAP_ADDR
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
-	dma_addr_t ADDR_NAME
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
-	u32 LEN_NAME
-#define pci_unmap_addr(PTR, ADDR_NAME) \
-	((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
-	(((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME) \
-	((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
-	(((PTR)->LEN_NAME) = (VAL))
-#endif /* DECLARE_PCI_UNMAP_ADDR */
-#endif /* < 2.6.18 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0)))
-#define i_private u.generic_ip
-#endif /* >= RHEL 5.0 */
-
-#ifndef DIV_ROUND_UP
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#endif
-#ifndef __ALIGN_MASK
-#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
-#endif
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
-#if (!((RHEL_RELEASE_CODE && \
-        ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
-          RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
-         (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0))))))
-typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
-#endif
-#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-#undef CONFIG_INET_LRO
-#undef CONFIG_INET_LRO_MODULE
-#ifdef IXGBE_FCOE
-#undef CONFIG_FCOE
-#undef CONFIG_FCOE_MODULE
-#endif /* IXGBE_FCOE */
-#endif
-typedef irqreturn_t (*new_handler_t)(int, void*);
-static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
-#else /* 2.4.x */
-typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
-typedef void (*new_handler_t)(int, void*);
-static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
-#endif /* >= 2.5.x */
-{
-	irq_handler_t new_handler = (irq_handler_t) handler;
-	return request_irq(irq, new_handler, flags, devname, dev_id);
-}
-
-#undef request_irq
-#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
-
-#define irq_handler_t new_handler_t
-/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-#define PCIE_CONFIG_SPACE_LEN 256
-#define PCI_CONFIG_SPACE_LEN 64
-#define PCIE_LINK_STATUS 0x12
-#define pci_config_space_ich8lan() do {} while(0)
-#undef pci_save_state
-extern int _kc_pci_save_state(struct pci_dev *);
-#define pci_save_state(pdev) _kc_pci_save_state(pdev)
-#undef pci_restore_state
-extern void _kc_pci_restore_state(struct pci_dev *);
-#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-#undef free_netdev
-extern void _kc_free_netdev(struct net_device *);
-#define free_netdev(netdev) _kc_free_netdev(netdev)
-#endif
-static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-{
-	return 0;
-}
-#define pci_disable_pcie_error_reporting(dev) do {} while (0)
-#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
-
-extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
-#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
-#ifndef bool
-#define bool _Bool
-#define true 1
-#define false 0
-#endif
-#else /* 2.6.19 */
-#include <linux/aer.h>
-#include <linux/string.h>
-#endif /* < 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
-#undef INIT_WORK
-#define INIT_WORK(_work, _func) \
-do { \
-	INIT_LIST_HEAD(&(_work)->entry); \
-	(_work)->pending = 0; \
-	(_work)->func = (void (*)(void *))_func; \
-	(_work)->data = _work; \
-	init_timer(&(_work)->timer); \
-} while (0)
-#endif
-
-#ifndef PCI_VDEVICE
-#define PCI_VDEVICE(ven, dev)        \
-	PCI_VENDOR_ID_##ven, (dev),  \
-	PCI_ANY_ID, PCI_ANY_ID, 0, 0
-#endif
-
-#ifndef PCI_VENDOR_ID_INTEL
-#define PCI_VENDOR_ID_INTEL 0x8086
-#endif
-
-#ifndef round_jiffies
-#define round_jiffies(x) x
-#endif
-
-#define csum_offset csum
-
-#define HAVE_EARLY_VMALLOC_NODE
-#define dev_to_node(dev) -1
-#undef set_dev_node
-/* remove compiler warning with b=b, for unused variable */
-#define set_dev_node(a, b) do { (b) = (b); } while(0)
-
-#if (!(RHEL_RELEASE_CODE && \
-       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
-        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
-     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
-typedef __u16 __bitwise __sum16;
-typedef __u32 __bitwise __wsum;
-#endif
-
-#if (!(RHEL_RELEASE_CODE && \
-       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
-        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
-     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
-static inline __wsum csum_unfold(__sum16 n)
-{
-	return (__force __wsum)n;
-}
-#endif
-
-#else /* < 2.6.20 */
-#define HAVE_DEVICE_NUMA_NODE
-#endif /* < 2.6.20 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-#define to_net_dev(class) container_of(class, struct net_device, class_dev)
-#define NETDEV_CLASS_DEV
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
-#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
-#define vlan_group_set_device(vg, id, dev)		\
-	do {						\
-		if (vg) vg->vlan_devices[id] = dev;	\
-	} while (0)
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
-#define pci_channel_offline(pdev) (pdev->error_state && \
-	pdev->error_state != pci_channel_io_normal)
-#define pci_request_selected_regions(pdev, bars, name) \
-        pci_request_regions(pdev, name)
-#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
-
-#ifndef __aligned
-#define __aligned(x)			__attribute__((aligned(x)))
-#endif
-
-extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev);
-#define netdev_to_dev(netdev)	\
-	pci_dev_to_dev(_kc_netdev_to_pdev(netdev))
-#else
-static inline struct device *netdev_to_dev(struct net_device *netdev)
-{
-	return &netdev->dev;
-}
-
-#endif /* < 2.6.21 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-#define tcp_hdr(skb) (skb->h.th)
-#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
-#define skb_transport_offset(skb) (skb->h.raw - skb->data)
-#define skb_transport_header(skb) (skb->h.raw)
-#define ipv6_hdr(skb) (skb->nh.ipv6h)
-#define ip_hdr(skb) (skb->nh.iph)
-#define skb_network_offset(skb) (skb->nh.raw - skb->data)
-#define skb_network_header(skb) (skb->nh.raw)
-#define skb_tail_pointer(skb) skb->tail
-#define skb_reset_tail_pointer(skb) \
-	do { \
-		skb->tail = skb->data; \
-	} while (0)
-#define skb_set_tail_pointer(skb, offset) \
-	do { \
-		skb->tail = skb->data + offset; \
-	} while (0)
-#define skb_copy_to_linear_data(skb, from, len) \
-				memcpy(skb->data, from, len)
-#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
-				memcpy(skb->data + offset, from, len)
-#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
-#define pci_register_driver pci_module_init
-#define skb_mac_header(skb) skb->mac.raw
-
-#ifdef NETIF_F_MULTI_QUEUE
-#ifndef alloc_etherdev_mq
-#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
-#endif
-#endif /* NETIF_F_MULTI_QUEUE */
-
-#ifndef ETH_FCS_LEN
-#define ETH_FCS_LEN 4
-#endif
-#define cancel_work_sync(x) flush_scheduled_work()
-#ifndef udp_hdr
-#define udp_hdr _udp_hdr
-static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
-{
-	return (struct udphdr *)skb_transport_header(skb);
-}
-#endif
-
-#ifdef cpu_to_be16
-#undef cpu_to_be16
-#endif
-#define cpu_to_be16(x) __constant_htons(x)
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
-enum {
-	DUMP_PREFIX_NONE,
-	DUMP_PREFIX_ADDRESS,
-	DUMP_PREFIX_OFFSET
-};
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
-#ifndef hex_asc
-#define hex_asc(x)	"0123456789abcdef"[x]
-#endif
-#include <linux/ctype.h>
-extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
-			       int prefix_type, int rowsize, int groupsize,
-			       const void *buf, size_t len, bool ascii);
-#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
-		_kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
-#ifndef ADVERTISED_2500baseX_Full
-#define ADVERTISED_2500baseX_Full (1 << 15)
-#endif
-#ifndef SUPPORTED_2500baseX_Full
-#define SUPPORTED_2500baseX_Full (1 << 15)
-#endif
-
-#ifdef HAVE_I2C_SUPPORT
-#include <linux/i2c.h>
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
-struct i2c_board_info {
-	char	driver_name[KOBJ_NAME_LEN];
-	char	type[I2C_NAME_SIZE];
-	unsigned short	flags;
-	unsigned short	addr;
-	void		*platform_data;
-};
-#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\
-			.addr = (dev_addr)
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
-#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info)
-extern struct i2c_client *
-_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info);
-#endif /* HAVE_I2C_SUPPORT */
-
-#else /* 2.6.22 */
-#define ETH_TYPE_TRANS_SETS_DEV
-#define HAVE_NETDEV_STATS_IN_NETDEV
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
-#undef SET_MODULE_OWNER
-#define SET_MODULE_OWNER(dev) do { } while (0)
-#endif /* > 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
-#define netif_subqueue_stopped(_a, _b) 0
-#ifndef PTR_ALIGN
-#define PTR_ALIGN(p, a)         ((typeof(p))ALIGN((unsigned long)(p), (a)))
-#endif
-
-#ifndef CONFIG_PM_SLEEP
-#define CONFIG_PM_SLEEP	CONFIG_PM
-#endif
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
-#define HAVE_ETHTOOL_GET_PERM_ADDR
-#endif /* 2.6.14 through 2.6.22 */
-#endif /* < 2.6.23 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifndef ETH_FLAG_LRO
-#define ETH_FLAG_LRO NETIF_F_LRO
-#endif
-
-/* if GRO is supported then the napi struct must already exist */
-#ifndef NETIF_F_GRO
-/* NAPI API changes in 2.6.24 break everything */
-struct napi_struct {
-	/* used to look up the real NAPI polling routine */
-	int (*poll)(struct napi_struct *, int);
-	struct net_device *dev;
-	int weight;
-};
-#endif
-
-#ifdef NAPI
-extern int __kc_adapter_clean(struct net_device *, int *);
-extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi);
-#define netif_napi_add(_netdev, _napi, _poll, _weight) \
-	do { \
-		struct napi_struct *__napi = (_napi); \
-		struct net_device *poll_dev = napi_to_poll_dev(__napi); \
-		poll_dev->poll = &(__kc_adapter_clean); \
-		poll_dev->priv = (_napi); \
-		poll_dev->weight = (_weight); \
-		set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
-		set_bit(__LINK_STATE_START, &poll_dev->state);\
-		dev_hold(poll_dev); \
-		__napi->poll = &(_poll); \
-		__napi->weight = (_weight); \
-		__napi->dev = (_netdev); \
-	} while (0)
-#define netif_napi_del(_napi) \
-	do { \
-		struct net_device *poll_dev = napi_to_poll_dev(_napi); \
-		WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
-		dev_put(poll_dev); \
-		memset(poll_dev, 0, sizeof(struct net_device));\
-	} while (0)
-#define napi_schedule_prep(_napi) \
-	(netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
-#define napi_schedule(_napi) \
-	do { \
-		if (napi_schedule_prep(_napi)) \
-			__netif_rx_schedule(napi_to_poll_dev(_napi)); \
-	} while (0)
-#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
-#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
-#ifdef CONFIG_SMP
-static inline void napi_synchronize(const struct napi_struct *n)
-{
-	struct net_device *dev = napi_to_poll_dev(n);
-
-	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
-		/* No hurry. */
-		msleep(1);
-	}
-}
-#else
-#define napi_synchronize(n)	barrier()
-#endif /* CONFIG_SMP */
-#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
-#ifndef NETIF_F_GRO
-#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
-#else
-#define napi_complete(_napi) \
-	do { \
-		napi_gro_flush(_napi); \
-		netif_rx_complete(napi_to_poll_dev(_napi)); \
-	} while (0)
-#endif /* NETIF_F_GRO */
-#else /* NAPI */
-#define netif_napi_add(_netdev, _napi, _poll, _weight) \
-	do { \
-		struct napi_struct *__napi = _napi; \
-		_netdev->poll = &(_poll); \
-		_netdev->weight = (_weight); \
-		__napi->poll = &(_poll); \
-		__napi->weight = (_weight); \
-		__napi->dev = (_netdev); \
-	} while (0)
-#define netif_napi_del(_a) do {} while (0)
-#endif /* NAPI */
-
-#undef dev_get_by_name
-#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
-#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
-#ifndef DMA_BIT_MASK
-#define DMA_BIT_MASK(n)	(((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
-#endif
-
-#ifdef NETIF_F_TSO6
-#define skb_is_gso_v6 _kc_skb_is_gso_v6
-static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
-}
-#endif /* NETIF_F_TSO6 */
-
-#ifndef KERN_CONT
-#define KERN_CONT	""
-#endif
-#ifndef pr_err
-#define pr_err(fmt, arg...) \
-	printk(KERN_ERR fmt, ##arg)
-#endif
-#else /* < 2.6.24 */
-#define HAVE_ETHTOOL_GET_SSET_COUNT
-#define HAVE_NETDEV_NAPI_LIST
-#endif /* < 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
-#include <linux/pm_qos_params.h>
-#else /* >= 3.2.0 */
-#include <linux/pm_qos.h>
-#endif /* else >= 3.2.0 */
-#endif /* > 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
-#define PM_QOS_CPU_DMA_LATENCY	1
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
-#include <linux/latency.h>
-#define PM_QOS_DEFAULT_VALUE	INFINITE_LATENCY
-#define pm_qos_add_requirement(pm_qos_class, name, value) \
-		set_acceptable_latency(name, value)
-#define pm_qos_remove_requirement(pm_qos_class, name) \
-		remove_acceptable_latency(name)
-#define pm_qos_update_requirement(pm_qos_class, name, value) \
-		modify_acceptable_latency(name, value)
-#else
-#define PM_QOS_DEFAULT_VALUE	-1
-#define pm_qos_add_requirement(pm_qos_class, name, value)
-#define pm_qos_remove_requirement(pm_qos_class, name)
-#define pm_qos_update_requirement(pm_qos_class, name, value) { \
-	if (value != PM_QOS_DEFAULT_VALUE) { \
-		printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
-			pci_name(adapter->pdev)); \
-	} \
-}
-
-#endif /* > 2.6.18 */
-
-#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
-
-#ifndef DEFINE_PCI_DEVICE_TABLE
-#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
-#endif /* DEFINE_PCI_DEVICE_TABLE */
-
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-#ifndef IGB_PROCFS
-#define IGB_PROCFS
-#endif /* IGB_PROCFS */
-#endif /* >= 2.6.0 */
-
-#else /* < 2.6.25 */
-
-
-#if IS_ENABLED(CONFIG_HWMON)
-#ifndef IGB_HWMON
-#define IGB_HWMON
-#endif /* IGB_HWMON */
-#endif /* CONFIG_HWMON */
-
-#endif /* < 2.6.25 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-#ifndef clamp_t
-#define clamp_t(type, val, min, max) ({		\
-	type __val = (val);			\
-	type __min = (min);			\
-	type __max = (max);			\
-	__val = __val < __min ? __min : __val;	\
-	__val > __max ? __max : __val; })
-#endif /* clamp_t */
-#undef kzalloc_node
-#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
-
-extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
-#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
-#else /* < 2.6.26 */
-#include <linux/pci-aspm.h>
-#define HAVE_NETDEV_VLAN_FEATURES
-#ifndef PCI_EXP_LNKCAP_ASPMS
-#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */
-#endif /* PCI_EXP_LNKCAP_ASPMS */
-#endif /* < 2.6.26 */
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
-					     __u32 speed)
-{
-	ep->speed = (__u16)speed;
-	/* ep->speed_hi = (__u16)(speed >> 16); */
-}
-#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
-
-static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
-{
-	/* no speed_hi before 2.6.27, and probably no need for it yet */
-	return (__u32)ep->speed;
-}
-#define ethtool_cmd_speed _kc_ethtool_cmd_speed
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
-#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
-#define ANCIENT_PM 1
-#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
-       (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
-       defined(CONFIG_PM_SLEEP))
-#define NEWER_PM 1
-#endif
-#if defined(ANCIENT_PM) || defined(NEWER_PM)
-#undef device_set_wakeup_enable
-#define device_set_wakeup_enable(dev, val) \
-	do { \
-		u16 pmc = 0; \
-		int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
-		if (pm) { \
-			pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
-				&pmc); \
-		} \
-		(dev)->power.can_wakeup = !!(pmc >> 11); \
-		(dev)->power.should_wakeup = (val && (pmc >> 11)); \
-	} while (0)
-#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
-#endif /* 2.6.15 through 2.6.27 */
-#ifndef netif_napi_del
-#define netif_napi_del(_a) do {} while (0)
-#ifdef NAPI
-#ifdef CONFIG_NETPOLL
-#undef netif_napi_del
-#define netif_napi_del(_a) list_del(&(_a)->dev_list);
-#endif
-#endif
-#endif /* netif_napi_del */
-#ifdef dma_mapping_error
-#undef dma_mapping_error
-#endif
-#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
-
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
-#define HAVE_TX_MQ
-#endif
-
-#ifdef HAVE_TX_MQ
-extern void _kc_netif_tx_stop_all_queues(struct net_device *);
-extern void _kc_netif_tx_wake_all_queues(struct net_device *);
-extern void _kc_netif_tx_start_all_queues(struct net_device *);
-#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
-#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
-#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
-#undef netif_stop_subqueue
-#define netif_stop_subqueue(_ndev,_qi) do { \
-	if (netif_is_multiqueue((_ndev))) \
-		netif_stop_subqueue((_ndev), (_qi)); \
-	else \
-		netif_stop_queue((_ndev)); \
-	} while (0)
-#undef netif_start_subqueue
-#define netif_start_subqueue(_ndev,_qi) do { \
-	if (netif_is_multiqueue((_ndev))) \
-		netif_start_subqueue((_ndev), (_qi)); \
-	else \
-		netif_start_queue((_ndev)); \
-	} while (0)
-#else /* HAVE_TX_MQ */
-#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
-#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
-#define netif_tx_start_all_queues(a) netif_start_queue(a)
-#else
-#define netif_tx_start_all_queues(a) do {} while (0)
-#endif
-#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
-#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
-#endif /* HAVE_TX_MQ */
-#ifndef NETIF_F_MULTI_QUEUE
-#define NETIF_F_MULTI_QUEUE 0
-#define netif_is_multiqueue(a) 0
-#define netif_wake_subqueue(a, b)
-#endif /* NETIF_F_MULTI_QUEUE */
-
-#ifndef __WARN_printf
-extern void __kc_warn_slowpath(const char *file, const int line,
-		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
-#endif /* __WARN_printf */
-
-#ifndef WARN
-#define WARN(condition, format...) ({						\
-	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on))					\
-		__WARN_printf(format);					\
-	unlikely(__ret_warn_on);					\
-})
-#endif /* WARN */
-#undef HAVE_IXGBE_DEBUG_FS
-#undef HAVE_IGB_DEBUG_FS
-#else /* < 2.6.27 */
-#define HAVE_TX_MQ
-#define HAVE_NETDEV_SELECT_QUEUE
-#ifdef CONFIG_DEBUG_FS
-#define HAVE_IXGBE_DEBUG_FS
-#define HAVE_IGB_DEBUG_FS
-#endif /* CONFIG_DEBUG_FS */
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-#define pci_ioremap_bar(pdev, bar)	ioremap(pci_resource_start(pdev, bar), \
-					        pci_resource_len(pdev, bar))
-#define pci_wake_from_d3 _kc_pci_wake_from_d3
-#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
-extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
-extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
-#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
-#ifndef __skb_queue_head_init
-static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
-{
-	list->prev = list->next = (struct sk_buff *)list;
-	list->qlen = 0;
-}
-#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
-#endif
-
-#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
-#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
-
-#endif /* < 2.6.28 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
-#ifndef swap
-#define swap(a, b) \
-	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-#endif
-#define pci_request_selected_regions_exclusive(pdev, bars, name) \
-		pci_request_selected_regions(pdev, bars, name)
-#ifndef CONFIG_NR_CPUS
-#define CONFIG_NR_CPUS 1
-#endif /* CONFIG_NR_CPUS */
-#ifndef pcie_aspm_enabled
-#define pcie_aspm_enabled()   (1)
-#endif /* pcie_aspm_enabled */
-
-#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
-
-#ifndef pci_clear_master
-extern void _kc_pci_clear_master(struct pci_dev *dev);
-#define pci_clear_master(dev)	_kc_pci_clear_master(dev)
-#endif
-
-#ifndef PCI_EXP_LNKCTL_ASPMC
-#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
-#endif
-#else /* < 2.6.29 */
-#ifndef HAVE_NET_DEVICE_OPS
-#define HAVE_NET_DEVICE_OPS
-#endif
-#ifdef CONFIG_DCB
-#define HAVE_PFC_MODE_ENABLE
-#endif /* CONFIG_DCB */
-#endif /* < 2.6.29 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
-#define skb_rx_queue_recorded(a) false
-#define skb_get_rx_queue(a) 0
-#define skb_record_rx_queue(a, b) do {} while (0)
-#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues)
-#ifndef CONFIG_PCI_IOV
-#undef pci_enable_sriov
-#define pci_enable_sriov(a, b) -ENOTSUPP
-#undef pci_disable_sriov
-#define pci_disable_sriov(a) do {} while (0)
-#endif /* CONFIG_PCI_IOV */
-#ifndef pr_cont
-#define pr_cont(fmt, ...) \
-	printk(KERN_CONT fmt, ##__VA_ARGS__)
-#endif /* pr_cont */
-static inline void _kc_synchronize_irq(unsigned int a)
-{
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
-	synchronize_irq();
-#else /* < 2.5.28 */
-	synchronize_irq(a);
-#endif /* < 2.5.28 */
-}
-#undef synchronize_irq
-#define synchronize_irq(a) _kc_synchronize_irq(a)
-
-#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
-
-#else /* < 2.6.30 */
-#define HAVE_ASPM_QUIRKS
-#endif /* < 2.6.30 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
-#define ETH_P_1588 0x88F7
-#define ETH_P_FIP  0x8914
-#ifndef netdev_uc_count
-#define netdev_uc_count(dev) ((dev)->uc_count)
-#endif
-#ifndef netdev_for_each_uc_addr
-#define netdev_for_each_uc_addr(uclist, dev) \
-	for (uclist = dev->uc_list; uclist; uclist = uclist->next)
-#endif
-#ifndef PORT_OTHER
-#define PORT_OTHER 0xff
-#endif
-#ifndef MDIO_PHY_ID_PRTAD
-#define MDIO_PHY_ID_PRTAD 0x03e0
-#endif
-#ifndef MDIO_PHY_ID_DEVAD
-#define MDIO_PHY_ID_DEVAD 0x001f
-#endif
-#ifndef skb_dst
-#define skb_dst(s) ((s)->dst)
-#endif
-
-#ifndef SUPPORTED_1000baseKX_Full
-#define SUPPORTED_1000baseKX_Full	(1 << 17)
-#endif
-#ifndef SUPPORTED_10000baseKX4_Full
-#define SUPPORTED_10000baseKX4_Full	(1 << 18)
-#endif
-#ifndef SUPPORTED_10000baseKR_Full
-#define SUPPORTED_10000baseKR_Full	(1 << 19)
-#endif
-
-#ifndef ADVERTISED_1000baseKX_Full
-#define ADVERTISED_1000baseKX_Full	(1 << 17)
-#endif
-#ifndef ADVERTISED_10000baseKX4_Full
-#define ADVERTISED_10000baseKX4_Full	(1 << 18)
-#endif
-#ifndef ADVERTISED_10000baseKR_Full
-#define ADVERTISED_10000baseKR_Full	(1 << 19)
-#endif
-
-#else /* < 2.6.31 */
-#ifndef HAVE_NETDEV_STORAGE_ADDRESS
-#define HAVE_NETDEV_STORAGE_ADDRESS
-#endif
-#ifndef HAVE_NETDEV_HW_ADDR
-#define HAVE_NETDEV_HW_ADDR
-#endif
-#ifndef HAVE_TRANS_START_IN_QUEUE
-#define HAVE_TRANS_START_IN_QUEUE
-#endif
-#ifndef HAVE_INCLUDE_LINUX_MDIO_H
-#define HAVE_INCLUDE_LINUX_MDIO_H
-#endif
-#endif /* < 2.6.31 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
-#undef netdev_tx_t
-#define netdev_tx_t int
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef NETIF_F_FCOE_MTU
-#define NETIF_F_FCOE_MTU       (1 << 26)
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-static inline int _kc_pm_runtime_get_sync()
-{
-	return 1;
-}
-#define pm_runtime_get_sync(dev)	_kc_pm_runtime_get_sync()
-#else /* 2.6.0 => 2.6.32 */
-static inline int _kc_pm_runtime_get_sync(struct device *dev)
-{
-	return 1;
-}
-#ifndef pm_runtime_get_sync
-#define pm_runtime_get_sync(dev)	_kc_pm_runtime_get_sync(dev)
-#endif
-#endif /* 2.6.0 => 2.6.32 */
-#ifndef pm_runtime_put
-#define pm_runtime_put(dev)		do {} while (0)
-#endif
-#ifndef pm_runtime_put_sync
-#define pm_runtime_put_sync(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_resume
-#define pm_runtime_resume(dev)		do {} while (0)
-#endif
-#ifndef pm_schedule_suspend
-#define pm_schedule_suspend(dev, t)	do {} while (0)
-#endif
-#ifndef pm_runtime_set_suspended
-#define pm_runtime_set_suspended(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_disable
-#define pm_runtime_disable(dev)		do {} while (0)
-#endif
-#ifndef pm_runtime_put_noidle
-#define pm_runtime_put_noidle(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_set_active
-#define pm_runtime_set_active(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_enable
-#define pm_runtime_enable(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_get_noresume
-#define pm_runtime_get_noresume(dev)	do {} while (0)
-#endif
-#else /* < 2.6.32 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
-#define HAVE_NETDEV_OPS_FCOE_ENABLE
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#ifdef CONFIG_DCB
-#ifndef HAVE_DCBNL_OPS_GETAPP
-#define HAVE_DCBNL_OPS_GETAPP
-#endif
-#endif /* CONFIG_DCB */
-#include <linux/pm_runtime.h>
-/* IOV bad DMA target work arounds require at least this kernel rev support */
-#define HAVE_PCIE_TYPE
-#endif /* < 2.6.32 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
-#ifndef pci_pcie_cap
-#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
-#endif
-#ifndef IPV4_FLOW
-#define IPV4_FLOW 0x10
-#endif /* IPV4_FLOW */
-#ifndef IPV6_FLOW
-#define IPV6_FLOW 0x11
-#endif /* IPV6_FLOW */
-/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
-#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
-      (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
-#define HAVE_NETDEV_OPS_FCOE_GETWWN
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#endif /* RHEL6 or SLES11 SP1 */
-#ifndef __percpu
-#define __percpu
-#endif /* __percpu */
-#ifndef PORT_DA
-#define PORT_DA PORT_OTHER
-#endif
-#ifndef PORT_NONE
-#define PORT_NONE PORT_OTHER
-#endif
-
-#if ((RHEL_RELEASE_CODE && \
-     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \
-     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))
-#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE)
-#undef DEFINE_DMA_UNMAP_ADDR
-#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)	dma_addr_t ADDR_NAME
-#undef DEFINE_DMA_UNMAP_LEN
-#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)		__u32 LEN_NAME
-#undef dma_unmap_addr
-#define dma_unmap_addr(PTR, ADDR_NAME)		((PTR)->ADDR_NAME)
-#undef dma_unmap_addr_set
-#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)	(((PTR)->ADDR_NAME) = (VAL))
-#undef dma_unmap_len
-#define dma_unmap_len(PTR, LEN_NAME)		((PTR)->LEN_NAME)
-#undef dma_unmap_len_set
-#define dma_unmap_len_set(PTR, LEN_NAME, VAL)	(((PTR)->LEN_NAME) = (VAL))
-#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */
-#endif /* RHEL_RELEASE_CODE */
-
-#if (!(RHEL_RELEASE_CODE && \
-       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \
-        ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))))
-static inline bool pci_is_pcie(struct pci_dev *dev)
-{
-	return !!pci_pcie_cap(dev);
-}
-#endif /* RHEL_RELEASE_CODE */
-
-#ifndef __always_unused
-#define __always_unused __attribute__((__unused__))
-#endif
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((__unused__))
-#endif
-
-#if (!(RHEL_RELEASE_CODE && \
-      (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))))
-#define sk_tx_queue_get(_sk) (-1)
-#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0)
-#endif /* !(RHEL >= 6.2) */
-
-#if (RHEL_RELEASE_CODE && \
-     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \
-     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
-#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
-#define HAVE_ETHTOOL_SET_PHYS_ID
-#define HAVE_ETHTOOL_GET_TS_INFO
-#endif /* RHEL >= 6.4 && RHEL < 7.0 */
-
-#if (RHEL_RELEASE_CODE && \
-     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \
-     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
-#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB
-#endif /* RHEL >= 6.5 && RHEL < 7.0 */
-
-#else /* < 2.6.33 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
-#define HAVE_NETDEV_OPS_FCOE_GETWWN
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#endif /* < 2.6.33 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
-#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-#ifndef pci_num_vf
-#define pci_num_vf(pdev) _kc_pci_num_vf(pdev)
-extern int _kc_pci_num_vf(struct pci_dev *dev);
-#endif
-#endif /* RHEL_RELEASE_CODE */
-
-#ifndef ETH_FLAG_NTUPLE
-#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
-#endif
-
-#ifndef netdev_mc_count
-#define netdev_mc_count(dev) ((dev)->mc_count)
-#endif
-#ifndef netdev_mc_empty
-#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
-#endif
-#ifndef netdev_for_each_mc_addr
-#define netdev_for_each_mc_addr(mclist, dev) \
-	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
-#endif
-#ifndef netdev_uc_count
-#define netdev_uc_count(dev) ((dev)->uc.count)
-#endif
-#ifndef netdev_uc_empty
-#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
-#endif
-#ifndef netdev_for_each_uc_addr
-#define netdev_for_each_uc_addr(ha, dev) \
-	list_for_each_entry(ha, &dev->uc.list, list)
-#endif
-#ifndef dma_set_coherent_mask
-#define dma_set_coherent_mask(dev,mask) \
-	pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
-#endif
-#ifndef pci_dev_run_wake
-#define pci_dev_run_wake(pdev)	(0)
-#endif
-
-/* netdev logging taken from include/linux/netdevice.h */
-#ifndef netdev_name
-static inline const char *_kc_netdev_name(const struct net_device *dev)
-{
-	if (dev->reg_state != NETREG_REGISTERED)
-		return "(unregistered net_device)";
-	return dev->name;
-}
-#define netdev_name(netdev)	_kc_netdev_name(netdev)
-#endif /* netdev_name */
-
-#undef netdev_printk
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-#define netdev_printk(level, netdev, format, args...)		\
-do {								\
-	struct pci_dev *pdev = _kc_netdev_to_pdev(netdev);	\
-	printk(level "%s: " format, pci_name(pdev), ##args);	\
-} while(0)
-#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-#define netdev_printk(level, netdev, format, args...)		\
-do {								\
-	struct pci_dev *pdev = _kc_netdev_to_pdev(netdev);	\
-	struct device *dev = pci_dev_to_dev(pdev);		\
-	dev_printk(level, dev, "%s: " format,			\
-		   netdev_name(netdev), ##args);		\
-} while(0)
-#else /* 2.6.21 => 2.6.34 */
-#define netdev_printk(level, netdev, format, args...)		\
-	dev_printk(level, (netdev)->dev.parent,			\
-		   "%s: " format,				\
-		   netdev_name(netdev), ##args)
-#endif /* <2.6.0 <2.6.21 <2.6.34 */
-#undef netdev_emerg
-#define netdev_emerg(dev, format, args...)			\
-	netdev_printk(KERN_EMERG, dev, format, ##args)
-#undef netdev_alert
-#define netdev_alert(dev, format, args...)			\
-	netdev_printk(KERN_ALERT, dev, format, ##args)
-#undef netdev_crit
-#define netdev_crit(dev, format, args...)			\
-	netdev_printk(KERN_CRIT, dev, format, ##args)
-#undef netdev_err
-#define netdev_err(dev, format, args...)			\
-	netdev_printk(KERN_ERR, dev, format, ##args)
-#undef netdev_warn
-#define netdev_warn(dev, format, args...)			\
-	netdev_printk(KERN_WARNING, dev, format, ##args)
-#undef netdev_notice
-#define netdev_notice(dev, format, args...)			\
-	netdev_printk(KERN_NOTICE, dev, format, ##args)
-#undef netdev_info
-#define netdev_info(dev, format, args...)			\
-	netdev_printk(KERN_INFO, dev, format, ##args)
-#undef netdev_dbg
-#if defined(DEBUG)
-#define netdev_dbg(__dev, format, args...)			\
-	netdev_printk(KERN_DEBUG, __dev, format, ##args)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
-#define netdev_dbg(__dev, format, args...)			\
-do {								\
-	dynamic_dev_dbg((__dev)->dev.parent, "%s: " format,	\
-			netdev_name(__dev), ##args);		\
-} while (0)
-#else /* DEBUG */
-#define netdev_dbg(__dev, format, args...)			\
-({								\
-	if (0)							\
-		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
-	0;							\
-})
-#endif /* DEBUG */
-
-#undef netif_printk
-#define netif_printk(priv, type, level, dev, fmt, args...)	\
-do {								\
-	if (netif_msg_##type(priv))				\
-		netdev_printk(level, (dev), fmt, ##args);	\
-} while (0)
-
-#undef netif_emerg
-#define netif_emerg(priv, type, dev, fmt, args...)		\
-	netif_level(emerg, priv, type, dev, fmt, ##args)
-#undef netif_alert
-#define netif_alert(priv, type, dev, fmt, args...)		\
-	netif_level(alert, priv, type, dev, fmt, ##args)
-#undef netif_crit
-#define netif_crit(priv, type, dev, fmt, args...)		\
-	netif_level(crit, priv, type, dev, fmt, ##args)
-#undef netif_err
-#define netif_err(priv, type, dev, fmt, args...)		\
-	netif_level(err, priv, type, dev, fmt, ##args)
-#undef netif_warn
-#define netif_warn(priv, type, dev, fmt, args...)		\
-	netif_level(warn, priv, type, dev, fmt, ##args)
-#undef netif_notice
-#define netif_notice(priv, type, dev, fmt, args...)		\
-	netif_level(notice, priv, type, dev, fmt, ##args)
-#undef netif_info
-#define netif_info(priv, type, dev, fmt, args...)		\
-	netif_level(info, priv, type, dev, fmt, ##args)
-#undef netif_dbg
-#define netif_dbg(priv, type, dev, fmt, args...)		\
-	netif_level(dbg, priv, type, dev, fmt, ##args)
-
-#ifdef SET_SYSTEM_SLEEP_PM_OPS
-#define HAVE_SYSTEM_SLEEP_PM_OPS
-#endif
-
-#ifndef for_each_set_bit
-#define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size)); \
-		(bit) < (size); \
-		(bit) = find_next_bit((addr), (size), (bit) + 1))
-#endif /* for_each_set_bit */
-
-#ifndef DEFINE_DMA_UNMAP_ADDR
-#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
-#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
-#define dma_unmap_addr pci_unmap_addr
-#define dma_unmap_addr_set pci_unmap_addr_set
-#define dma_unmap_len pci_unmap_len
-#define dma_unmap_len_set pci_unmap_len_set
-#endif /* DEFINE_DMA_UNMAP_ADDR */
-
-#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3))
-#ifdef IGB_HWMON
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define sysfs_attr_init(attr)				\
-	do {						\
-		static struct lock_class_key __key;	\
-		(attr)->key = &__key;			\
-	} while (0)
-#else
-#define sysfs_attr_init(attr) do {} while (0)
-#endif /* CONFIG_DEBUG_LOCK_ALLOC */
-#endif /* IGB_HWMON */
-#endif /* RHEL_RELEASE_CODE */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-static inline bool _kc_pm_runtime_suspended()
-{
-	return false;
-}
-#define pm_runtime_suspended(dev)	_kc_pm_runtime_suspended()
-#else /* 2.6.0 => 2.6.34 */
-static inline bool _kc_pm_runtime_suspended(struct device *dev)
-{
-	return false;
-}
-#ifndef pm_runtime_suspended
-#define pm_runtime_suspended(dev)	_kc_pm_runtime_suspended(dev)
-#endif
-#endif /* 2.6.0 => 2.6.34 */
-
-#else /* < 2.6.34 */
-#define HAVE_SYSTEM_SLEEP_PM_OPS
-#ifndef HAVE_SET_RX_MODE
-#define HAVE_SET_RX_MODE
-#endif
-
-#endif /* < 2.6.34 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-
-ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
-				   const void __user *from, size_t count);
-#define simple_write_to_buffer _kc_simple_write_to_buffer
-
-#ifndef numa_node_id
-#define numa_node_id() 0
-#endif
-#ifdef HAVE_TX_MQ
-#include <net/sch_generic.h>
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
-void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
-#define netif_set_real_num_tx_queues  _kc_netif_set_real_num_tx_queues
-#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
-#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
-#define netif_set_real_num_tx_queues(_netdev, _count) \
-	do { \
-		(_netdev)->egress_subqueue_count = _count; \
-	} while (0)
-#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
-#else /* HAVE_TX_MQ */
-#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
-#endif /* HAVE_TX_MQ */
-#ifndef ETH_FLAG_RXHASH
-#define ETH_FLAG_RXHASH (1<<28)
-#endif /* ETH_FLAG_RXHASH */
-#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))
-#define HAVE_IRQ_AFFINITY_HINT
-#endif
-#else /* < 2.6.35 */
-#define HAVE_PM_QOS_REQUEST_LIST
-#define HAVE_IRQ_AFFINITY_HINT
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
-#define ethtool_op_set_flags _kc_ethtool_op_set_flags
-extern u32 _kc_ethtool_op_get_flags(struct net_device *);
-#define ethtool_op_get_flags _kc_ethtool_op_get_flags
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#ifdef NET_IP_ALIGN
-#undef NET_IP_ALIGN
-#endif
-#define NET_IP_ALIGN 0
-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#ifdef NET_SKB_PAD
-#undef NET_SKB_PAD
-#endif
-
-#if (L1_CACHE_BYTES > 32)
-#define NET_SKB_PAD L1_CACHE_BYTES
-#else
-#define NET_SKB_PAD 32
-#endif
-
-static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
-							    unsigned int length)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
-	if (skb) {
-#if (NET_IP_ALIGN + NET_SKB_PAD)
-		skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
-#endif
-		skb->dev = dev;
-	}
-	return skb;
-}
-
-#ifdef netdev_alloc_skb_ip_align
-#undef netdev_alloc_skb_ip_align
-#endif
-#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
-
-#undef netif_level
-#define netif_level(level, priv, type, dev, fmt, args...)	\
-do {								\
-	if (netif_msg_##type(priv))				\
-		netdev_##level(dev, fmt, ##args);		\
-} while (0)
-
-#undef usleep_range
-#define usleep_range(min, max)	msleep(DIV_ROUND_UP(min, 1000))
-
-#define u64_stats_update_begin(a) do { } while(0)
-#define u64_stats_update_end(a) do { } while(0)
-#define u64_stats_fetch_begin(a) do { } while(0)
-#define u64_stats_fetch_retry_bh(a) (0)
-#define u64_stats_fetch_begin_bh(a) (0)
-
-#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1))
-#define HAVE_8021P_SUPPORT
-#endif
-
-#else /* < 2.6.36 */
-
-
-#define HAVE_PM_QOS_REQUEST_ACTIVE
-#define HAVE_8021P_SUPPORT
-#define HAVE_NDO_GET_STATS64
-#endif /* < 2.6.36 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
-#ifndef netif_set_real_num_rx_queues
-static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev,
-						    unsigned int rxq)
-{
-	return 0;
-}
-#define netif_set_real_num_rx_queues(dev, rxq) \
-	__kc_netif_set_real_num_rx_queues((dev), (rxq))
-#endif
-#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
-#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
-#endif
-#ifndef VLAN_N_VID
-#define VLAN_N_VID	VLAN_GROUP_ARRAY_LEN
-#endif /* VLAN_N_VID */
-#ifndef ETH_FLAG_TXVLAN
-#define ETH_FLAG_TXVLAN (1 << 7)
-#endif /* ETH_FLAG_TXVLAN */
-#ifndef ETH_FLAG_RXVLAN
-#define ETH_FLAG_RXVLAN (1 << 8)
-#endif /* ETH_FLAG_RXVLAN */
-
-static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
-{
-	WARN_ON(skb->ip_summed != CHECKSUM_NONE);
-}
-#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
-
-static inline void *_kc_vzalloc_node(unsigned long size, int node)
-{
-	void *addr = vmalloc_node(size, node);
-	if (addr)
-		memset(addr, 0, size);
-	return addr;
-}
-#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
-
-static inline void *_kc_vzalloc(unsigned long size)
-{
-	void *addr = vmalloc(size);
-	if (addr)
-		memset(addr, 0, size);
-	return addr;
-}
-#define vzalloc(_size) _kc_vzalloc(_size)
-
-#ifndef vlan_get_protocol
-static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
-{
-	if (vlan_tx_tag_present(skb) ||
-	    skb->protocol != cpu_to_be16(ETH_P_8021Q))
-		return skb->protocol;
-
-	if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
-		return 0;
-
-	return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
-}
-#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
-#endif
-#ifdef HAVE_HW_TIME_STAMP
-#define SKBTX_HW_TSTAMP (1 << 0)
-#define SKBTX_IN_PROGRESS (1 << 2)
-#define SKB_SHARED_TX_IS_UNION
-#endif
-
-#ifndef device_wakeup_enable
-#define device_wakeup_enable(dev)	device_set_wakeup_enable(dev, true)
-#endif
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
-#ifndef HAVE_VLAN_RX_REGISTER
-#define HAVE_VLAN_RX_REGISTER
-#endif
-#endif /* > 2.4.18 */
-#endif /* < 2.6.37 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
-#else /* 2.6.22 -> 2.6.37 */
-static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
-{
-        return skb->csum_start - skb_headroom(skb);
-}
-#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
-#endif /* 2.6.22 -> 2.6.37 */
-#ifdef CONFIG_DCB
-#ifndef IEEE_8021QAZ_MAX_TCS
-#define IEEE_8021QAZ_MAX_TCS 8
-#endif
-#ifndef DCB_CAP_DCBX_HOST
-#define DCB_CAP_DCBX_HOST		0x01
-#endif
-#ifndef DCB_CAP_DCBX_LLD_MANAGED
-#define DCB_CAP_DCBX_LLD_MANAGED	0x02
-#endif
-#ifndef DCB_CAP_DCBX_VER_CEE
-#define DCB_CAP_DCBX_VER_CEE		0x04
-#endif
-#ifndef DCB_CAP_DCBX_VER_IEEE
-#define DCB_CAP_DCBX_VER_IEEE		0x08
-#endif
-#ifndef DCB_CAP_DCBX_STATIC
-#define DCB_CAP_DCBX_STATIC		0x10
-#endif
-#endif /* CONFIG_DCB */
-#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))
-#define CONFIG_XPS
-#endif /* RHEL_RELEASE_VERSION(6,2) */
-#endif /* < 2.6.38 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#ifndef NETIF_F_RXCSUM
-#define NETIF_F_RXCSUM		(1 << 29)
-#endif
-#ifndef skb_queue_reverse_walk_safe
-#define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
-		for (skb = (queue)->prev, tmp = skb->prev;			\
-		     skb != (struct sk_buff *)(queue);				\
-		     skb = tmp, tmp = skb->prev)
-#endif
-#else /* < 2.6.39 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
-#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#ifndef HAVE_MQPRIO
-#define HAVE_MQPRIO
-#endif
-#ifndef HAVE_SETUP_TC
-#define HAVE_SETUP_TC
-#endif
-#ifdef CONFIG_DCB
-#ifndef HAVE_DCBNL_IEEE
-#define HAVE_DCBNL_IEEE
-#endif
-#endif /* CONFIG_DCB */
-#ifndef HAVE_NDO_SET_FEATURES
-#define HAVE_NDO_SET_FEATURES
-#endif
-#endif /* < 2.6.39 */
-
-/*****************************************************************************/
-/* use < 2.6.40 because of a Fedora 15 kernel update where they
- * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
- * like set_phys_id for ethtool.
- */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
-#ifdef ETHTOOL_GRXRINGS
-#ifndef FLOW_EXT
-#define	FLOW_EXT	0x80000000
-union _kc_ethtool_flow_union {
-	struct ethtool_tcpip4_spec		tcp_ip4_spec;
-	struct ethtool_usrip4_spec		usr_ip4_spec;
-	__u8					hdata[60];
-};
-struct _kc_ethtool_flow_ext {
-	__be16	vlan_etype;
-	__be16	vlan_tci;
-	__be32	data[2];
-};
-struct _kc_ethtool_rx_flow_spec {
-	__u32		flow_type;
-	union _kc_ethtool_flow_union h_u;
-	struct _kc_ethtool_flow_ext h_ext;
-	union _kc_ethtool_flow_union m_u;
-	struct _kc_ethtool_flow_ext m_ext;
-	__u64		ring_cookie;
-	__u32		location;
-};
-#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
-#endif /* FLOW_EXT */
-#endif
-
-#define pci_disable_link_state_locked pci_disable_link_state
-
-#ifndef PCI_LTR_VALUE_MASK
-#define  PCI_LTR_VALUE_MASK	0x000003ff
-#endif
-#ifndef PCI_LTR_SCALE_MASK
-#define  PCI_LTR_SCALE_MASK	0x00001c00
-#endif
-#ifndef PCI_LTR_SCALE_SHIFT
-#define  PCI_LTR_SCALE_SHIFT	10
-#endif
-
-#else /* < 2.6.40 */
-#define HAVE_ETHTOOL_SET_PHYS_ID
-#endif /* < 2.6.40 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) )
-#define USE_LEGACY_PM_SUPPORT
-#endif /* < 3.0.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
-#ifndef __netdev_alloc_skb_ip_align
-#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
-#endif /* __netdev_alloc_skb_ip_align */
-#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
-#define dcb_ieee_delapp(dev, app) 0
-#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
-
-/* 1000BASE-T Control register */
-#define CTL1000_AS_MASTER	0x0800
-#define CTL1000_ENABLE_MASTER	0x1000
-
-#else /* < 3.1.0 */
-#ifndef HAVE_DCBNL_IEEE_DELAPP
-#define HAVE_DCBNL_IEEE_DELAPP
-#endif
-#endif /* < 3.1.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
-#ifdef ETHTOOL_GRXRINGS
-#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
-#endif /* ETHTOOL_GRXRINGS */
-
-#ifndef skb_frag_size
-#define skb_frag_size(frag)	_kc_skb_frag_size(frag)
-static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
-{
-	return frag->size;
-}
-#endif /* skb_frag_size */
-
-#ifndef skb_frag_size_sub
-#define skb_frag_size_sub(frag, delta)	_kc_skb_frag_size_sub(frag, delta)
-static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
-{
-	frag->size -= delta;
-}
-#endif /* skb_frag_size_sub */
-
-#ifndef skb_frag_page
-#define skb_frag_page(frag)	_kc_skb_frag_page(frag)
-static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
-{
-	return frag->page;
-}
-#endif /* skb_frag_page */
-
-#ifndef skb_frag_address
-#define skb_frag_address(frag)	_kc_skb_frag_address(frag)
-static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
-{
-	return page_address(skb_frag_page(frag)) + frag->page_offset;
-}
-#endif /* skb_frag_address */
-
-#ifndef skb_frag_dma_map
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-#include <linux/dma-mapping.h>
-#endif
-#define skb_frag_dma_map(dev,frag,offset,size,dir) \
-		_kc_skb_frag_dma_map(dev,frag,offset,size,dir)
-static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
-					      const skb_frag_t *frag,
-					      size_t offset, size_t size,
-					      enum dma_data_direction dir)
-{
-	return dma_map_page(dev, skb_frag_page(frag),
-			    frag->page_offset + offset, size, dir);
-}
-#endif /* skb_frag_dma_map */
-
-#ifndef __skb_frag_unref
-#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
-static inline void __kc_skb_frag_unref(skb_frag_t *frag)
-{
-	put_page(skb_frag_page(frag));
-}
-#endif /* __skb_frag_unref */
-
-#ifndef SPEED_UNKNOWN
-#define SPEED_UNKNOWN	-1
-#endif
-#ifndef DUPLEX_UNKNOWN
-#define DUPLEX_UNKNOWN	0xff
-#endif
-#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3))
-#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
-#define HAVE_PCI_DEV_FLAGS_ASSIGNED
-#endif
-#endif
-#else /* < 3.2.0 */
-#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
-#define HAVE_PCI_DEV_FLAGS_ASSIGNED
-#define HAVE_VF_SPOOFCHK_CONFIGURE
-#endif
-#endif /* < 3.2.0 */
-
-#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2))
-#undef ixgbe_get_netdev_tc_txq
-#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
-#endif
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
-typedef u32 kni_netdev_features_t;
-#undef PCI_EXP_TYPE_RC_EC
-#define  PCI_EXP_TYPE_RC_EC	0xa	/* Root Complex Event Collector */
-#ifndef CONFIG_BQL
-#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0)
-#define netdev_completed_queue(_n, _p, _b) do {} while (0)
-#define netdev_tx_sent_queue(_q, _b) do {} while (0)
-#define netdev_sent_queue(_n, _b) do {} while (0)
-#define netdev_tx_reset_queue(_q) do {} while (0)
-#define netdev_reset_queue(_n) do {} while (0)
-#endif
-#else /* ! < 3.3.0 */
-typedef netdev_features_t kni_netdev_features_t;
-#define HAVE_INT_NDO_VLAN_RX_ADD_VID
-#ifdef ETHTOOL_SRXNTUPLE
-#undef ETHTOOL_SRXNTUPLE
-#endif
-#endif /* < 3.3.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-#ifndef NETIF_F_RXFCS
-#define NETIF_F_RXFCS	0
-#endif /* NETIF_F_RXFCS */
-#ifndef NETIF_F_RXALL
-#define NETIF_F_RXALL	0
-#endif /* NETIF_F_RXALL */
-
-#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
-#define NUMTCS_RETURNS_U8
-
-int _kc_simple_open(struct inode *inode, struct file *file);
-#define simple_open _kc_simple_open
-#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
-
-
-#ifndef skb_add_rx_frag
-#define skb_add_rx_frag _kc_skb_add_rx_frag
-extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *,
-				int, int, unsigned int);
-#endif
-#ifdef NET_ADDR_RANDOM
-#define eth_hw_addr_random(N) do { \
-	random_ether_addr(N->dev_addr); \
-	N->addr_assign_type |= NET_ADDR_RANDOM; \
-	} while (0)
-#else /* NET_ADDR_RANDOM */
-#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr)
-#endif /* NET_ADDR_RANDOM */
-#else /* < 3.4.0 */
-#include <linux/kconfig.h>
-#endif /* >= 3.4.0 */
-
-/*****************************************************************************/
-#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP)
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-#define HAVE_PTP_1588_CLOCK
-#else
-#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel
-#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
-#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
-#define skb_tx_timestamp(skb) do {} while (0)
-static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
-{
-	return !compare_ether_addr(addr1, addr2);
-}
-#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
-#else
-#define HAVE_FDB_OPS
-#define HAVE_ETHTOOL_GET_TS_INFO
-#endif /* < 3.5.0 */
-
-/*****************************************************************************/
-#include <linux/mdio.h>
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) )
-#define PCI_EXP_LNKCAP2		44	/* Link Capability 2 */
-
-#ifndef MDIO_EEE_100TX
-#define MDIO_EEE_100TX		0x0002	/* 100TX EEE cap */
-#endif
-#ifndef MDIO_EEE_1000T
-#define MDIO_EEE_1000T		0x0004	/* 1000T EEE cap */
-#endif
-#ifndef MDIO_EEE_10GT
-#define MDIO_EEE_10GT		0x0008	/* 10GT EEE cap */
-#endif
-#ifndef MDIO_EEE_1000KX
-#define MDIO_EEE_1000KX		0x0010	/* 1000KX EEE cap */
-#endif
-#ifndef MDIO_EEE_10GKX4
-#define MDIO_EEE_10GKX4		0x0020	/* 10G KX4 EEE cap */
-#endif
-#ifndef MDIO_EEE_10GKR
-#define MDIO_EEE_10GKR		0x0040	/* 10G KR EEE cap */
-#endif
-#endif /* < 3.6.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
-#ifndef ADVERTISED_40000baseKR4_Full
-/* these defines were all added in one commit, so should be safe
- * to trigger activiation on one define
- */
-#define SUPPORTED_40000baseKR4_Full	(1 << 23)
-#define SUPPORTED_40000baseCR4_Full	(1 << 24)
-#define SUPPORTED_40000baseSR4_Full	(1 << 25)
-#define SUPPORTED_40000baseLR4_Full	(1 << 26)
-#define ADVERTISED_40000baseKR4_Full	(1 << 23)
-#define ADVERTISED_40000baseCR4_Full	(1 << 24)
-#define ADVERTISED_40000baseSR4_Full	(1 << 25)
-#define ADVERTISED_40000baseLR4_Full	(1 << 26)
-#endif
-
-/**
- * mmd_eee_cap_to_ethtool_sup_t
- * @eee_cap: value of the MMD EEE Capability register
- *
- * A small helper function that translates MMD EEE Capability (3.20) bits
- * to ethtool supported settings.
- */
-static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap)
-{
-	u32 supported = 0;
-
-	if (eee_cap & MDIO_EEE_100TX)
-		supported |= SUPPORTED_100baseT_Full;
-	if (eee_cap & MDIO_EEE_1000T)
-		supported |= SUPPORTED_1000baseT_Full;
-	if (eee_cap & MDIO_EEE_10GT)
-		supported |= SUPPORTED_10000baseT_Full;
-	if (eee_cap & MDIO_EEE_1000KX)
-		supported |= SUPPORTED_1000baseKX_Full;
-	if (eee_cap & MDIO_EEE_10GKX4)
-		supported |= SUPPORTED_10000baseKX4_Full;
-	if (eee_cap & MDIO_EEE_10GKR)
-		supported |= SUPPORTED_10000baseKR_Full;
-
-	return supported;
-}
-#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \
-	__kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap)
-
-/**
- * mmd_eee_adv_to_ethtool_adv_t
- * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers
- *
- * A small helper function that translates the MMD EEE Advertisement (7.60)
- * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement
- * settings.
- */
-static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv)
-{
-	u32 adv = 0;
-
-	if (eee_adv & MDIO_EEE_100TX)
-		adv |= ADVERTISED_100baseT_Full;
-	if (eee_adv & MDIO_EEE_1000T)
-		adv |= ADVERTISED_1000baseT_Full;
-	if (eee_adv & MDIO_EEE_10GT)
-		adv |= ADVERTISED_10000baseT_Full;
-	if (eee_adv & MDIO_EEE_1000KX)
-		adv |= ADVERTISED_1000baseKX_Full;
-	if (eee_adv & MDIO_EEE_10GKX4)
-		adv |= ADVERTISED_10000baseKX4_Full;
-	if (eee_adv & MDIO_EEE_10GKR)
-		adv |= ADVERTISED_10000baseKR_Full;
-
-	return adv;
-}
-#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \
-	__kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv)
-
-/**
- * ethtool_adv_to_mmd_eee_adv_t
- * @adv: the ethtool advertisement settings
- *
- * A small helper function that translates ethtool advertisement settings
- * to EEE advertisements for the MMD EEE Advertisement (7.60) and
- * MMD EEE Link Partner Ability (7.61) registers.
- */
-static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv)
-{
-	u16 reg = 0;
-
-	if (adv & ADVERTISED_100baseT_Full)
-		reg |= MDIO_EEE_100TX;
-	if (adv & ADVERTISED_1000baseT_Full)
-		reg |= MDIO_EEE_1000T;
-	if (adv & ADVERTISED_10000baseT_Full)
-		reg |= MDIO_EEE_10GT;
-	if (adv & ADVERTISED_1000baseKX_Full)
-		reg |= MDIO_EEE_1000KX;
-	if (adv & ADVERTISED_10000baseKX4_Full)
-		reg |= MDIO_EEE_10GKX4;
-	if (adv & ADVERTISED_10000baseKR_Full)
-		reg |= MDIO_EEE_10GKR;
-
-	return reg;
-}
-#define ethtool_adv_to_mmd_eee_adv_t(adv) \
-	__kc_ethtool_adv_to_mmd_eee_adv_t(adv)
-
-#ifndef pci_pcie_type
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-static inline u8 pci_pcie_type(struct pci_dev *pdev)
-{
-	int pos;
-	u16 reg16;
-
-	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-	if (!pos)
-		BUG();
-	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
-	return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
-}
-#else /* < 2.6.24 */
-#define pci_pcie_type(x)	(x)->pcie_type
-#endif /* < 2.6.24 */
-#endif /* pci_pcie_type */
-
-#define ptp_clock_register(caps, args...) ptp_clock_register(caps)
-
-#ifndef PCI_EXP_LNKSTA2
-int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
-#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v)
-int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val);
-#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v)
-int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
-					    u16 clear, u16 set);
-#define pcie_capability_clear_and_set_word(d,p,c,s) \
-	__kc_pcie_capability_clear_and_set_word(d,p,c,s)
-
-#define PCI_EXP_LNKSTA2		50	/* Link Status 2 */
-
-static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos,
-					     u16 clear)
-{
-	return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0);
-}
-#endif /* !PCI_EXP_LNKSTA2 */
-
-#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
-#define USE_CONST_DEV_UC_CHAR
-#endif
-
-#else /* >= 3.7.0 */
-#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS
-#define USE_CONST_DEV_UC_CHAR
-#endif /* >= 3.7.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) )
-#ifndef PCI_EXP_LNKCTL_ASPM_L0S
-#define  PCI_EXP_LNKCTL_ASPM_L0S  0x01	/* L0s Enable */
-#endif
-#ifndef PCI_EXP_LNKCTL_ASPM_L1
-#define  PCI_EXP_LNKCTL_ASPM_L1   0x02	/* L1 Enable */
-#endif
-#define HAVE_CONFIG_HOTPLUG
-/* Reserved Ethernet Addresses per IEEE 802.1Q */
-static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = {
-	0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
-#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\
-    !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
-static inline bool is_link_local_ether_addr(const u8 *addr)
-{
-	__be16 *a = (__be16 *)addr;
-	static const __be16 *b = (const __be16 *)eth_reserved_addr_base;
-	static const __be16 m = cpu_to_be16(0xfff0);
-
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
-}
-#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
-#else /* >= 3.8.0 */
-#ifndef __devinit
-#define __devinit
-#define HAVE_ENCAP_CSUM_OFFLOAD
-#endif
-
-#ifndef __devinitdata
-#define __devinitdata
-#endif
-
-#ifndef __devexit
-#define __devexit
-#endif
-
-#ifndef __devexit_p
-#define __devexit_p
-#endif
-
-#ifndef HAVE_SRIOV_CONFIGURE
-#define HAVE_SRIOV_CONFIGURE
-#endif
-
-#define HAVE_BRIDGE_ATTRIBS
-#ifndef BRIDGE_MODE_VEB
-#define BRIDGE_MODE_VEB		0	/* Default loopback mode */
-#endif /* BRIDGE_MODE_VEB */
-#ifndef BRIDGE_MODE_VEPA
-#define BRIDGE_MODE_VEPA	1	/* 802.1Qbg defined VEPA mode */
-#endif /* BRIDGE_MODE_VEPA */
-#endif /* >= 3.8.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
-
-#undef hlist_entry
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#undef hlist_entry_safe
-#define hlist_entry_safe(ptr, type, member) \
-	(ptr) ? hlist_entry(ptr, type, member) : NULL
-
-#undef hlist_for_each_entry
-#define hlist_for_each_entry(pos, head, member)                             \
-	for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
-	     pos;                                                           \
-	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
-
-#undef hlist_for_each_entry_safe
-#define hlist_for_each_entry_safe(pos, n, head, member)		    \
-	for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);   \
-	     pos && ({ n = pos->member.next; 1; });			    \
-	     pos = hlist_entry_safe(n, typeof(*pos), member))
-
-#ifdef CONFIG_XPS
-extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16);
-#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx))
-#else /* CONFIG_XPS */
-#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0)
-#endif /* CONFIG_XPS */
-
-#ifdef HAVE_NETDEV_SELECT_QUEUE
-#define _kc_hashrnd 0xd631614b /* not so random hash salt */
-extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
-#define __netdev_pick_tx __kc_netdev_pick_tx
-#endif /* HAVE_NETDEV_SELECT_QUEUE */
-#else
-#define HAVE_BRIDGE_FILTER
-#define USE_DEFAULT_FDB_DEL_DUMP
-#endif /* < 3.9.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
-#else
-static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
-	return 0;
-}
-#endif
-#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
-
-#ifndef VLAN_TX_COOKIE_MAGIC
-static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb,
-							 u16 vlan_tci)
-{
-#ifdef VLAN_TAG_PRESENT
-	vlan_tci |= VLAN_TAG_PRESENT;
-#endif
-	skb->vlan_tci = vlan_tci;
-        return skb;
-}
-#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \
-	__kc__vlan_hwaccel_put_tag(skb, vlan_tci)
-#endif
-
-#else /* >= 3.10.0 */
-#define HAVE_ENCAP_TSO_OFFLOAD
-#endif /* >= 3.10.0 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) )
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6)))
-#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \
-    && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \
-     || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4))))
-#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0)))
-#ifdef NETIF_F_RXHASH
-#define PKT_HASH_TYPE_L3 0
-static inline void
-skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
-{
-	skb->rxhash = hash;
-}
-#endif /* NETIF_F_RXHASH */
-#endif /* < SLES12 */
-#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */
-#endif /* < RHEL7 */
-#endif /* < 3.14.0 */
-
-#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \
-    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
-#undef SET_ETHTOOL_OPS
-#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
-#define HAVE_VF_MIN_MAX_TXRATE 1
-#endif /* >= 3.16.0 */
-
-#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
-    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
-#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
-#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
-#define HAVE_NDO_FDB_ADD_VID
-#endif /* !RHEL 7.2 */
-#endif /* >= 3.19.0 */
-
-#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \
-    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
-/* vlan_tx_xx functions got renamed to skb_vlan */
-#define vlan_tx_tag_get skb_vlan_tag_get
-#define vlan_tx_tag_present skb_vlan_tag_present
-#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
-#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
-#endif /* !RHEL 7.2 */
-#endif /* 4.0.0 */
-
-#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \
-    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) ))
-/* ndo_bridge_getlink adds new nlflags parameter */
-#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
-#endif /* >= 4.1.0 */
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) )
-/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */
-#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
-#endif /* >= 4.2.0 */
-
-/*
- * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
- * For older kernels backported this commit, need to use renamed functions.
- * This fix is specific to RedHat/CentOS kernels.
- */
-#if (defined(RHEL_RELEASE_CODE) && \
-	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
-#define vlan_tx_tag_get skb_vlan_tag_get
-#define vlan_tx_tag_present skb_vlan_tag_present
-#endif
-
-#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \
-    (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0)))
-#define HAVE_VF_VLAN_PROTO
-#endif /* >= 4.9.0, >= SLES12SP3 */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
-#define HAVE_PCI_ENABLE_MSIX
-#endif
-
-#if defined(timer_setup) && defined(from_timer)
-#define HAVE_TIMER_SETUP
-#endif
-
-#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
deleted file mode 100644
index 6ff94133..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
+++ /dev/null
@@ -1,910 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_H_
-#define _IXGBE_H_
-
-#ifndef IXGBE_NO_LRO
-#include <net/tcp.h>
-#endif
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#ifdef HAVE_IRQ_AFFINITY_HINT
-#include <linux/cpumask.h>
-#endif /* HAVE_IRQ_AFFINITY_HINT */
-#include <linux/vmalloc.h>
-
-#ifdef SIOCETHTOOL
-#include <linux/ethtool.h>
-#endif
-#ifdef NETIF_F_HW_VLAN_TX
-#include <linux/if_vlan.h>
-#endif
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-#define IXGBE_DCA
-#include <linux/dca.h>
-#endif
-#include "ixgbe_dcb.h"
-
-#include "kcompat.h"
-
-#ifdef HAVE_SCTP
-#include <linux/sctp.h>
-#endif
-
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#define IXGBE_FCOE
-#include "ixgbe_fcoe.h"
-#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
-
-#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE)
-#define HAVE_IXGBE_PTP
-#endif
-
-#include "ixgbe_api.h"
-
-#define PFX "ixgbe: "
-#define DPRINTK(nlevel, klevel, fmt, args...) \
-	((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
-	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
-		__func__ , ## args)))
-
-/* TX/RX descriptor defines */
-#define IXGBE_DEFAULT_TXD		512
-#define IXGBE_DEFAULT_TX_WORK		256
-#define IXGBE_MAX_TXD			4096
-#define IXGBE_MIN_TXD			64
-
-#define IXGBE_DEFAULT_RXD		512
-#define IXGBE_DEFAULT_RX_WORK		256
-#define IXGBE_MAX_RXD			4096
-#define IXGBE_MIN_RXD			64
-
-
-/* flow control */
-#define IXGBE_MIN_FCRTL			0x40
-#define IXGBE_MAX_FCRTL			0x7FF80
-#define IXGBE_MIN_FCRTH			0x600
-#define IXGBE_MAX_FCRTH			0x7FFF0
-#define IXGBE_DEFAULT_FCPAUSE		0xFFFF
-#define IXGBE_MIN_FCPAUSE		0
-#define IXGBE_MAX_FCPAUSE		0xFFFF
-
-/* Supported Rx Buffer Sizes */
-#define IXGBE_RXBUFFER_512	512    /* Used for packet split */
-#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-#define IXGBE_RXBUFFER_1536	1536
-#define IXGBE_RXBUFFER_2K	2048
-#define IXGBE_RXBUFFER_3K	3072
-#define IXGBE_RXBUFFER_4K	4096
-#define IXGBE_RXBUFFER_7K	7168
-#define IXGBE_RXBUFFER_8K	8192
-#define IXGBE_RXBUFFER_15K	15360
-#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */
-#define IXGBE_MAX_RXBUFFER	16384  /* largest size for single descriptor */
-
-/*
- * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we
- * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
- * this adds up to 512 bytes of extra data meaning the smallest allocation
- * we could have is 1K.
- * i.e. RXBUFFER_512 --> size-1024 slab
- */
-#define IXGBE_RX_HDR_SIZE	IXGBE_RXBUFFER_512
-
-#define MAXIMUM_ETHERNET_VLAN_SIZE	(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
-
-/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define IXGBE_RX_BUFFER_WRITE	16	/* Must be power of 2 */
-
-#define IXGBE_TX_FLAGS_CSUM		(u32)(1)
-#define IXGBE_TX_FLAGS_HW_VLAN		(u32)(1 << 1)
-#define IXGBE_TX_FLAGS_SW_VLAN		(u32)(1 << 2)
-#define IXGBE_TX_FLAGS_TSO		(u32)(1 << 3)
-#define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 4)
-#define IXGBE_TX_FLAGS_FCOE		(u32)(1 << 5)
-#define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
-#define IXGBE_TX_FLAGS_TXSW		(u32)(1 << 7)
-#define IXGBE_TX_FLAGS_TSTAMP		(u32)(1 << 8)
-#define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
-#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
-#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT	29
-#define IXGBE_TX_FLAGS_VLAN_SHIFT	16
-
-#define IXGBE_MAX_RX_DESC_POLL		10
-
-#define IXGBE_MAX_VF_MC_ENTRIES		30
-#define IXGBE_MAX_VF_FUNCTIONS		64
-#define IXGBE_MAX_VFTA_ENTRIES		128
-#define MAX_EMULATION_MAC_ADDRS		16
-#define IXGBE_MAX_PF_MACVLANS		15
-#define IXGBE_82599_VF_DEVICE_ID	0x10ED
-#define IXGBE_X540_VF_DEVICE_ID		0x1515
-
-#ifdef CONFIG_PCI_IOV
-#define VMDQ_P(p)	((p) + adapter->num_vfs)
-#else
-#define VMDQ_P(p)	(p)
-#endif
-
-#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter)	\
-	{							\
-		u32 current_counter = IXGBE_READ_REG(hw, reg);	\
-		if (current_counter < last_counter)		\
-			counter += 0x100000000LL;		\
-		last_counter = current_counter;			\
-		counter &= 0xFFFFFFFF00000000LL;		\
-		counter |= current_counter;			\
-	}
-
-#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
-	{								 \
-		u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb);	 \
-		u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb);	 \
-		u64 current_counter = (current_counter_msb << 32) |	 \
-			current_counter_lsb;				 \
-		if (current_counter < last_counter)			 \
-			counter += 0x1000000000LL;			 \
-		last_counter = current_counter;				 \
-		counter &= 0xFFFFFFF000000000LL;			 \
-		counter |= current_counter;				 \
-	}
-
-struct vf_stats {
-	u64 gprc;
-	u64 gorc;
-	u64 gptc;
-	u64 gotc;
-	u64 mprc;
-};
-
-struct vf_data_storage {
-	unsigned char vf_mac_addresses[ETH_ALEN];
-	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
-	u16 num_vf_mc_hashes;
-	u16 default_vf_vlan_id;
-	u16 vlans_enabled;
-	bool clear_to_send;
-	struct vf_stats vfstats;
-	struct vf_stats last_vfstats;
-	struct vf_stats saved_rst_vfstats;
-	bool pf_set_mac;
-	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
-	u16 pf_qos;
-	u16 tx_rate;
-	u16 vlan_count;
-	u8 spoofchk_enabled;
-	struct pci_dev *vfdev;
-};
-
-struct vf_macvlans {
-	struct list_head l;
-	int vf;
-	bool free;
-	bool is_macvlan;
-	u8 vf_macvlan[ETH_ALEN];
-};
-
-#ifndef IXGBE_NO_LRO
-#define IXGBE_LRO_MAX		32	/*Maximum number of LRO descriptors*/
-#define IXGBE_LRO_GLOBAL	10
-
-struct ixgbe_lro_stats {
-	u32 flushed;
-	u32 coal;
-};
-
-/*
- * ixgbe_lro_header - header format to be aggregated by LRO
- * @iph: IP header without options
- * @tcp: TCP header
- * @ts:  Optional TCP timestamp data in TCP options
- *
- * This structure relies on the check above that verifies that the header
- * is IPv4 and does not contain any options.
- */
-struct ixgbe_lrohdr {
-	struct iphdr iph;
-	struct tcphdr th;
-	__be32 ts[0];
-};
-
-struct ixgbe_lro_list {
-	struct sk_buff_head active;
-	struct ixgbe_lro_stats stats;
-};
-
-#endif /* IXGBE_NO_LRO */
-#define IXGBE_MAX_TXD_PWR	14
-#define IXGBE_MAX_DATA_PER_TXD	(1 << IXGBE_MAX_TXD_PWR)
-
-/* Tx Descriptors needed, worst case */
-#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD)
-#ifdef MAX_SKB_FRAGS
-#define DESC_NEEDED	((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
-#else
-#define DESC_NEEDED	4
-#endif
-
-/* wrapper around a pointer to a socket buffer,
- * so a DMA handle can be stored along with the buffer */
-struct ixgbe_tx_buffer {
-	union ixgbe_adv_tx_desc *next_to_watch;
-	unsigned long time_stamp;
-	struct sk_buff *skb;
-	unsigned int bytecount;
-	unsigned short gso_segs;
-	__be16 protocol;
-	DEFINE_DMA_UNMAP_ADDR(dma);
-	DEFINE_DMA_UNMAP_LEN(len);
-	u32 tx_flags;
-};
-
-struct ixgbe_rx_buffer {
-	struct sk_buff *skb;
-	dma_addr_t dma;
-#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-	struct page *page;
-	unsigned int page_offset;
-#endif
-};
-
-struct ixgbe_queue_stats {
-	u64 packets;
-	u64 bytes;
-};
-
-struct ixgbe_tx_queue_stats {
-	u64 restart_queue;
-	u64 tx_busy;
-	u64 tx_done_old;
-};
-
-struct ixgbe_rx_queue_stats {
-	u64 rsc_count;
-	u64 rsc_flush;
-	u64 non_eop_descs;
-	u64 alloc_rx_page_failed;
-	u64 alloc_rx_buff_failed;
-	u64 csum_err;
-};
-
-enum ixgbe_ring_state_t {
-	__IXGBE_TX_FDIR_INIT_DONE,
-	__IXGBE_TX_DETECT_HANG,
-	__IXGBE_HANG_CHECK_ARMED,
-	__IXGBE_RX_RSC_ENABLED,
-#ifndef HAVE_NDO_SET_FEATURES
-	__IXGBE_RX_CSUM_ENABLED,
-#endif
-	__IXGBE_RX_CSUM_UDP_ZERO_ERR,
-#ifdef IXGBE_FCOE
-	__IXGBE_RX_FCOE_BUFSZ,
-#endif
-};
-
-#define check_for_tx_hang(ring) \
-	test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-	set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-	clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
-#ifndef IXGBE_NO_HW_RSC
-#define ring_is_rsc_enabled(ring) \
-	test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
-#else
-#define ring_is_rsc_enabled(ring)	false
-#endif
-#define set_ring_rsc_enabled(ring) \
-	set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
-#define clear_ring_rsc_enabled(ring) \
-	clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
-#define netdev_ring(ring) (ring->netdev)
-#define ring_queue_index(ring) (ring->queue_index)
-
-
-struct ixgbe_ring {
-	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
-	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
-	struct net_device *netdev;	/* netdev ring belongs to */
-	struct device *dev;		/* device for DMA mapping */
-	void *desc;			/* descriptor ring memory */
-	union {
-		struct ixgbe_tx_buffer *tx_buffer_info;
-		struct ixgbe_rx_buffer *rx_buffer_info;
-	};
-	unsigned long state;
-	u8 __iomem *tail;
-	dma_addr_t dma;			/* phys. address of descriptor ring */
-	unsigned int size;		/* length in bytes */
-
-	u16 count;			/* amount of descriptors */
-
-	u8 queue_index; /* needed for multiqueue queue management */
-	u8 reg_idx;			/* holds the special value that gets
-					 * the hardware register offset
-					 * associated with this ring, which is
-					 * different for DCB and RSS modes
-					 */
-	u16 next_to_use;
-	u16 next_to_clean;
-
-	union {
-#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-		u16 rx_buf_len;
-#else
-		u16 next_to_alloc;
-#endif
-		struct {
-			u8 atr_sample_rate;
-			u8 atr_count;
-		};
-	};
-
-	u8 dcb_tc;
-	struct ixgbe_queue_stats stats;
-	union {
-		struct ixgbe_tx_queue_stats tx_stats;
-		struct ixgbe_rx_queue_stats rx_stats;
-	};
-} ____cacheline_internodealigned_in_smp;
-
-enum ixgbe_ring_f_enum {
-	RING_F_NONE = 0,
-	RING_F_VMDQ,  /* SR-IOV uses the same ring feature */
-	RING_F_RSS,
-	RING_F_FDIR,
-#ifdef IXGBE_FCOE
-	RING_F_FCOE,
-#endif /* IXGBE_FCOE */
-	RING_F_ARRAY_SIZE  /* must be last in enum set */
-};
-
-#define IXGBE_MAX_DCB_INDICES	8
-#define IXGBE_MAX_RSS_INDICES	16
-#define IXGBE_MAX_VMDQ_INDICES	64
-#define IXGBE_MAX_FDIR_INDICES	64
-#ifdef IXGBE_FCOE
-#define IXGBE_MAX_FCOE_INDICES	8
-#define MAX_RX_QUEUES	(IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
-#define MAX_TX_QUEUES	(IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
-#else
-#define MAX_RX_QUEUES	IXGBE_MAX_FDIR_INDICES
-#define MAX_TX_QUEUES	IXGBE_MAX_FDIR_INDICES
-#endif /* IXGBE_FCOE */
-struct ixgbe_ring_feature {
-	int indices;
-	int mask;
-};
-
-#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-/*
- * FCoE requires that all Rx buffers be over 2200 bytes in length.  Since
- * this is twice the size of a half page we need to double the page order
- * for FCoE enabled Rx queues.
- */
-#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192)
-static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
-{
-	return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0;
-}
-#else
-#define ixgbe_rx_pg_order(_ring) 0
-#endif
-#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
-#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring))
-
-#endif
-struct ixgbe_ring_container {
-	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
-	unsigned int total_bytes;	/* total bytes processed this int */
-	unsigned int total_packets;	/* total packets processed this int */
-	u16 work_limit;			/* total work allowed per interrupt */
-	u8 count;			/* total number of rings in vector */
-	u8 itr;				/* current ITR setting for ring */
-};
-
-/* iterator for handling rings in ring container */
-#define ixgbe_for_each_ring(pos, head) \
-	for (pos = (head).ring; pos != NULL; pos = pos->next)
-
-#define MAX_RX_PACKET_BUFFERS	((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
-				 ? 8 : 1)
-#define MAX_TX_PACKET_BUFFERS	MAX_RX_PACKET_BUFFERS
-
-/* MAX_MSIX_Q_VECTORS of these are allocated,
- * but we only use one per queue-specific vector.
- */
-struct ixgbe_q_vector {
-	struct ixgbe_adapter *adapter;
-	int cpu;	/* CPU for DCA */
-	u16 v_idx;	/* index of q_vector within array, also used for
-			 * finding the bit in EICR and friends that
-			 * represents the vector for this ring */
-	u16 itr;	/* Interrupt throttle rate written to EITR */
-	struct ixgbe_ring_container rx, tx;
-
-#ifdef CONFIG_IXGBE_NAPI
-	struct napi_struct napi;
-#endif
-#ifndef HAVE_NETDEV_NAPI_LIST
-	struct net_device poll_dev;
-#endif
-#ifdef HAVE_IRQ_AFFINITY_HINT
-	cpumask_t affinity_mask;
-#endif
-#ifndef IXGBE_NO_LRO
-	struct ixgbe_lro_list lrolist;   /* LRO list for queue vector*/
-#endif
-	int numa_node;
-	char name[IFNAMSIZ + 9];
-
-	/* for dynamic allocation of rings associated with this q_vector */
-	struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
-};
-
-/*
- * microsecond values for various ITR rates shifted by 2 to fit itr register
- * with the first 3 bits reserved 0
- */
-#define IXGBE_MIN_RSC_ITR	24
-#define IXGBE_100K_ITR		40
-#define IXGBE_20K_ITR		200
-#define IXGBE_16K_ITR		248
-#define IXGBE_10K_ITR		400
-#define IXGBE_8K_ITR		500
-
-/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */
-static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc,
-					const u32 stat_err_bits)
-{
-	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
-}
-
-/* ixgbe_desc_unused - calculate if we have unused descriptors */
-static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
-{
-	u16 ntc = ring->next_to_clean;
-	u16 ntu = ring->next_to_use;
-
-	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
-}
-
-#define IXGBE_RX_DESC(R, i)	\
-	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
-#define IXGBE_TX_DESC(R, i)	\
-	(&(((union ixgbe_adv_tx_desc *)((R)->desc))[i]))
-#define IXGBE_TX_CTXTDESC(R, i)	\
-	(&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i]))
-
-#define IXGBE_MAX_JUMBO_FRAME_SIZE	16128
-#ifdef IXGBE_FCOE
-/* use 3K as the baby jumbo frame size for FCoE */
-#define IXGBE_FCOE_JUMBO_FRAME_SIZE	3072
-#endif /* IXGBE_FCOE */
-
-#define TCP_TIMER_VECTOR	0
-#define OTHER_VECTOR	1
-#define NON_Q_VECTORS	(OTHER_VECTOR + TCP_TIMER_VECTOR)
-
-#define IXGBE_MAX_MSIX_Q_VECTORS_82599	64
-#define IXGBE_MAX_MSIX_Q_VECTORS_82598	16
-
-struct ixgbe_mac_addr {
-	u8 addr[ETH_ALEN];
-	u16 queue;
-	u16 state; /* bitmask */
-};
-#define IXGBE_MAC_STATE_DEFAULT		0x1
-#define IXGBE_MAC_STATE_MODIFIED	0x2
-#define IXGBE_MAC_STATE_IN_USE		0x4
-
-#ifdef IXGBE_PROCFS
-struct ixgbe_therm_proc_data {
-	struct ixgbe_hw *hw;
-	struct ixgbe_thermal_diode_data *sensor_data;
-};
-
-#endif /* IXGBE_PROCFS */
-
-/*
- * Only for array allocations in our adapter struct.  On 82598, there will be
- * unused entries in the array, but that's not a big deal.  Also, in 82599,
- * we can actually assign 64 queue vectors based on our extended-extended
- * interrupt registers.  This is different than 82598, which is limited to 16.
- */
-#define MAX_MSIX_Q_VECTORS	IXGBE_MAX_MSIX_Q_VECTORS_82599
-#define MAX_MSIX_COUNT		IXGBE_MAX_MSIX_VECTORS_82599
-
-#define MIN_MSIX_Q_VECTORS	1
-#define MIN_MSIX_COUNT		(MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
-
-/* default to trying for four seconds */
-#define IXGBE_TRY_LINK_TIMEOUT	(4 * HZ)
-
-/* board specific private data structure */
-struct ixgbe_adapter {
-#ifdef NETIF_F_HW_VLAN_TX
-#ifdef HAVE_VLAN_RX_REGISTER
-	struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */
-#else
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-#endif
-#endif /* NETIF_F_HW_VLAN_TX */
-	/* OS defined structs */
-	struct net_device *netdev;
-	struct pci_dev *pdev;
-
-	unsigned long state;
-
-	/* Some features need tri-state capability,
-	 * thus the additional *_CAPABLE flags.
-	 */
-	u32 flags;
-#define IXGBE_FLAG_MSI_CAPABLE			(u32)(1 << 0)
-#define IXGBE_FLAG_MSI_ENABLED			(u32)(1 << 1)
-#define IXGBE_FLAG_MSIX_CAPABLE			(u32)(1 << 2)
-#define IXGBE_FLAG_MSIX_ENABLED			(u32)(1 << 3)
-#ifndef IXGBE_NO_LLI
-#define IXGBE_FLAG_LLI_PUSH			(u32)(1 << 4)
-#endif
-#define IXGBE_FLAG_IN_NETPOLL                   (u32)(1 << 8)
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-#define IXGBE_FLAG_DCA_ENABLED			(u32)(1 << 9)
-#define IXGBE_FLAG_DCA_CAPABLE			(u32)(1 << 10)
-#define IXGBE_FLAG_DCA_ENABLED_DATA		(u32)(1 << 11)
-#else
-#define IXGBE_FLAG_DCA_ENABLED			(u32)0
-#define IXGBE_FLAG_DCA_CAPABLE			(u32)0
-#define IXGBE_FLAG_DCA_ENABLED_DATA             (u32)0
-#endif
-#define IXGBE_FLAG_MQ_CAPABLE			(u32)(1 << 12)
-#define IXGBE_FLAG_DCB_ENABLED			(u32)(1 << 13)
-#define IXGBE_FLAG_DCB_CAPABLE			(u32)(1 << 14)
-#define IXGBE_FLAG_RSS_ENABLED			(u32)(1 << 15)
-#define IXGBE_FLAG_RSS_CAPABLE			(u32)(1 << 16)
-#define IXGBE_FLAG_VMDQ_ENABLED			(u32)(1 << 18)
-#define IXGBE_FLAG_FAN_FAIL_CAPABLE		(u32)(1 << 19)
-#define IXGBE_FLAG_NEED_LINK_UPDATE		(u32)(1 << 20)
-#define IXGBE_FLAG_NEED_LINK_CONFIG		(u32)(1 << 21)
-#define IXGBE_FLAG_FDIR_HASH_CAPABLE		(u32)(1 << 22)
-#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE		(u32)(1 << 23)
-#ifdef IXGBE_FCOE
-#define IXGBE_FLAG_FCOE_CAPABLE			(u32)(1 << 24)
-#define IXGBE_FLAG_FCOE_ENABLED			(u32)(1 << 25)
-#endif /* IXGBE_FCOE */
-#define IXGBE_FLAG_SRIOV_CAPABLE		(u32)(1 << 26)
-#define IXGBE_FLAG_SRIOV_ENABLED		(u32)(1 << 27)
-#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE	(u32)(1 << 28)
-#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE	(u32)(1 << 29)
-#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE	(u32)(1 << 30)
-#define IXGBE_FLAG_RX_BB_CAPABLE		(u32)(1 << 31)
-
-	u32 flags2;
-#ifndef IXGBE_NO_HW_RSC
-#define IXGBE_FLAG2_RSC_CAPABLE			(u32)(1)
-#define IXGBE_FLAG2_RSC_ENABLED			(u32)(1 << 1)
-#else
-#define IXGBE_FLAG2_RSC_CAPABLE			0
-#define IXGBE_FLAG2_RSC_ENABLED			0
-#endif
-#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE	(u32)(1 << 2)
-#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE		(u32)(1 << 4)
-#define IXGBE_FLAG2_TEMP_SENSOR_EVENT		(u32)(1 << 5)
-#define IXGBE_FLAG2_SEARCH_FOR_SFP		(u32)(1 << 6)
-#define IXGBE_FLAG2_SFP_NEEDS_RESET		(u32)(1 << 7)
-#define IXGBE_FLAG2_RESET_REQUESTED		(u32)(1 << 8)
-#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT	(u32)(1 << 9)
-#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP		(u32)(1 << 10)
-#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		(u32)(1 << 11)
-#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED      (u32)(1 << 12)
-
-	/* Tx fast path data */
-	int num_tx_queues;
-	u16 tx_itr_setting;
-	u16 tx_work_limit;
-
-	/* Rx fast path data */
-	int num_rx_queues;
-	u16 rx_itr_setting;
-	u16 rx_work_limit;
-
-	/* TX */
-	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
-
-	u64 restart_queue;
-	u64 lsc_int;
-	u32 tx_timeout_count;
-
-	/* RX */
-	struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
-	int num_rx_pools;		/* == num_rx_queues in 82598 */
-	int num_rx_queues_per_pool;	/* 1 if 82598, can be many if 82599 */
-	u64 hw_csum_rx_error;
-	u64 hw_rx_no_dma_resources;
-	u64 rsc_total_count;
-	u64 rsc_total_flush;
-	u64 non_eop_descs;
-#ifndef CONFIG_IXGBE_NAPI
-	u64 rx_dropped_backlog;		/* count drops from rx intr handler */
-#endif
-	u32 alloc_rx_page_failed;
-	u32 alloc_rx_buff_failed;
-
-	struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
-
-#ifdef HAVE_DCBNL_IEEE
-	struct ieee_pfc *ixgbe_ieee_pfc;
-	struct ieee_ets *ixgbe_ieee_ets;
-#endif
-	struct ixgbe_dcb_config dcb_cfg;
-	struct ixgbe_dcb_config temp_dcb_cfg;
-	u8 dcb_set_bitmap;
-	u8 dcbx_cap;
-#ifndef HAVE_MQPRIO
-	u8 tc;
-#endif
-	enum ixgbe_fc_mode last_lfc_mode;
-
-	int num_msix_vectors;
-	int max_msix_q_vectors;         /* true count of q_vectors for device */
-	struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
-	struct msix_entry *msix_entries;
-
-#ifndef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats net_stats;
-#endif
-#ifndef IXGBE_NO_LRO
-	struct ixgbe_lro_stats lro_stats;
-#endif
-
-#ifdef ETHTOOL_TEST
-	u32 test_icr;
-	struct ixgbe_ring test_tx_ring;
-	struct ixgbe_ring test_rx_ring;
-#endif
-
-	/* structs defined in ixgbe_hw.h */
-	struct ixgbe_hw hw;
-	u16 msg_enable;
-	struct ixgbe_hw_stats stats;
-#ifndef IXGBE_NO_LLI
-	u32 lli_port;
-	u32 lli_size;
-	u32 lli_etype;
-	u32 lli_vlan_pri;
-#endif /* IXGBE_NO_LLI */
-
-	u32 *config_space;
-	u64 tx_busy;
-	unsigned int tx_ring_count;
-	unsigned int rx_ring_count;
-
-	u32 link_speed;
-	bool link_up;
-	unsigned long link_check_timeout;
-
-	struct timer_list service_timer;
-	struct work_struct service_task;
-
-	struct hlist_head fdir_filter_list;
-	unsigned long fdir_overflow; /* number of times ATR was backed off */
-	union ixgbe_atr_input fdir_mask;
-	int fdir_filter_count;
-	u32 fdir_pballoc;
-	u32 atr_sample_rate;
-	spinlock_t fdir_perfect_lock;
-
-#ifdef IXGBE_FCOE
-	struct ixgbe_fcoe fcoe;
-#endif /* IXGBE_FCOE */
-	u32 wol;
-
-	u16 bd_number;
-
-	char eeprom_id[32];
-	u16 eeprom_cap;
-	bool netdev_registered;
-	u32 interrupt_event;
-#ifdef HAVE_ETHTOOL_SET_PHYS_ID
-	u32 led_reg;
-#endif
-
-	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
-	unsigned int num_vfs;
-	struct vf_data_storage *vfinfo;
-	int vf_rate_link_speed;
-	struct vf_macvlans vf_mvs;
-	struct vf_macvlans *mv_list;
-#ifdef CONFIG_PCI_IOV
-	u32 timer_event_accumulator;
-	u32 vferr_refcount;
-#endif
-	struct ixgbe_mac_addr *mac_table;
-#ifdef IXGBE_SYSFS
-	struct kobject *info_kobj;
-	struct kobject *therm_kobj[IXGBE_MAX_SENSORS];
-#else /* IXGBE_SYSFS */
-#ifdef IXGBE_PROCFS
-	struct proc_dir_entry *eth_dir;
-	struct proc_dir_entry *info_dir;
-	struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS];
-	struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS];
-#endif /* IXGBE_PROCFS */
-#endif /* IXGBE_SYSFS */
-};
-
-struct ixgbe_fdir_filter {
-	struct  hlist_node fdir_node;
-	union ixgbe_atr_input filter;
-	u16 sw_idx;
-	u16 action;
-};
-
-enum ixgbe_state_t {
-	__IXGBE_TESTING,
-	__IXGBE_RESETTING,
-	__IXGBE_DOWN,
-	__IXGBE_SERVICE_SCHED,
-	__IXGBE_IN_SFP_INIT,
-};
-
-struct ixgbe_cb {
-#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-	union {				/* Union defining head/tail partner */
-		struct sk_buff *head;
-		struct sk_buff *tail;
-	};
-#endif
-	dma_addr_t dma;
-#ifndef IXGBE_NO_LRO
-	__be32	tsecr;			/* timestamp echo response */
-	u32	tsval;			/* timestamp value in host order */
-	u32	next_seq;		/* next expected sequence number */
-	u16	free;			/* 65521 minus total size */
-	u16	mss;			/* size of data portion of packet */
-#endif /* IXGBE_NO_LRO */
-#ifdef HAVE_VLAN_RX_REGISTER
-	u16	vid;			/* VLAN tag */
-#endif
-	u16	append_cnt;		/* number of skb's appended */
-#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-	bool	page_released;
-#endif
-};
-#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb)
-
-#ifdef IXGBE_SYSFS
-void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter);
-int ixgbe_sysfs_init(struct ixgbe_adapter *adapter);
-#endif /* IXGBE_SYSFS */
-#ifdef IXGBE_PROCFS
-void ixgbe_procfs_exit(struct ixgbe_adapter *adapter);
-int ixgbe_procfs_init(struct ixgbe_adapter *adapter);
-int ixgbe_procfs_topdir_init(void);
-void ixgbe_procfs_topdir_exit(void);
-#endif /* IXGBE_PROCFS */
-
-extern struct dcbnl_rtnl_ops dcbnl_ops;
-extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max);
-
-extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index);
-
-/* needed by ixgbe_main.c */
-extern int ixgbe_validate_mac_addr(u8 *mc_addr);
-extern void ixgbe_check_options(struct ixgbe_adapter *adapter);
-extern void ixgbe_assign_netdev_ops(struct net_device *netdev);
-
-/* needed by ixgbe_ethtool.c */
-extern char ixgbe_driver_name[];
-extern const char ixgbe_driver_version[];
-
-extern void ixgbe_up(struct ixgbe_adapter *adapter);
-extern void ixgbe_down(struct ixgbe_adapter *adapter);
-extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
-extern void ixgbe_reset(struct ixgbe_adapter *adapter);
-extern void ixgbe_set_ethtool_ops(struct net_device *netdev);
-extern int ixgbe_setup_rx_resources(struct ixgbe_ring *);
-extern int ixgbe_setup_tx_resources(struct ixgbe_ring *);
-extern void ixgbe_free_rx_resources(struct ixgbe_ring *);
-extern void ixgbe_free_tx_resources(struct ixgbe_ring *);
-extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *,
-				    struct ixgbe_ring *);
-extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *,
-				    struct ixgbe_ring *);
-extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
-extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
-extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
-extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev);
-extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *,
-					 struct ixgbe_adapter *,
-					 struct ixgbe_ring *);
-extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *,
-					     struct ixgbe_tx_buffer *);
-extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
-extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
-				   struct ixgbe_ring *);
-extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
-			       struct ixgbe_ring *);
-extern void ixgbe_set_rx_mode(struct net_device *netdev);
-extern int ixgbe_write_mc_addr_list(struct net_device *netdev);
-extern int ixgbe_setup_tc(struct net_device *dev, u8 tc);
-#ifdef IXGBE_FCOE
-extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32);
-#endif /* IXGBE_FCOE */
-extern void ixgbe_do_reset(struct net_device *netdev);
-extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector);
-extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
-				   struct ixgbe_ring *);
-extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter);
-extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter);
-#ifdef ETHTOOL_OPS_COMPAT
-extern int ethtool_ioctl(struct ifreq *ifr);
-#endif
-
-#ifdef IXGBE_FCOE
-extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter);
-extern int ixgbe_fso(struct ixgbe_ring *tx_ring,
-		     struct ixgbe_tx_buffer *first,
-		     u8 *hdr_len);
-extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter);
-extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
-			  union ixgbe_adv_rx_desc *rx_desc,
-			  struct sk_buff *skb);
-extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
-			      struct scatterlist *sgl, unsigned int sgc);
-#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
-extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
-				 struct scatterlist *sgl, unsigned int sgc);
-#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */
-extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid);
-#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE
-extern int ixgbe_fcoe_enable(struct net_device *netdev);
-extern int ixgbe_fcoe_disable(struct net_device *netdev);
-#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
-#ifdef CONFIG_DCB
-#ifdef HAVE_DCBNL_OPS_GETAPP
-extern u8 ixgbe_fcoe_getapp(struct net_device *netdev);
-#endif /* HAVE_DCBNL_OPS_GETAPP */
-extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up);
-#endif /* CONFIG_DCB */
-#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN
-extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type);
-#endif
-#endif /* IXGBE_FCOE */
-
-#ifdef CONFIG_DCB
-#ifdef HAVE_DCBNL_IEEE
-s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame);
-#endif /* HAVE_DCBNL_IEEE */
-#endif /* CONFIG_DCB */
-
-extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring);
-extern int ixgbe_get_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd);
-extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
-			    struct net_device *netdev, unsigned int vfn);
-extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
-extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
-				u8 *addr, u16 queue);
-extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
-				u8 *addr, u16 queue);
-extern int ixgbe_available_rars(struct ixgbe_adapter *adapter);
-#ifndef HAVE_VLAN_RX_REGISTER
-extern void ixgbe_vlan_mode(struct net_device *, u32);
-#endif
-#ifndef ixgbe_get_netdev_tc_txq
-#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc])
-#endif
-extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter);
-#endif /* _IXGBE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
deleted file mode 100644
index 242de671..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
+++ /dev/null
@@ -1,1281 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_type.h"
-#include "ixgbe_82598.h"
-#include "ixgbe_api.h"
-#include "ixgbe_common.h"
-#include "ixgbe_phy.h"
-
-static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
-					     ixgbe_link_speed *speed,
-					     bool *autoneg);
-static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw);
-static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
-				      bool autoneg_wait_to_complete);
-static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
-				      ixgbe_link_speed *speed, bool *link_up,
-				      bool link_up_wait_to_complete);
-static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
-				      ixgbe_link_speed speed,
-				      bool autoneg,
-				      bool autoneg_wait_to_complete);
-static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
-					 ixgbe_link_speed speed,
-					 bool autoneg,
-					 bool autoneg_wait_to_complete);
-static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw);
-static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw);
-static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
-				  u32 headroom, int strategy);
-
-/**
- *  ixgbe_set_pcie_completion_timeout - set pci-e completion timeout
- *  @hw: pointer to the HW structure
- *
- *  The defaults for 82598 should be in the range of 50us to 50ms,
- *  however the hardware default for these parts is 500us to 1ms which is less
- *  than the 10ms recommended by the pci-e spec.  To address this we need to
- *  increase the value to either 10ms to 250ms for capability version 1 config,
- *  or 16ms to 55ms for version 2.
- **/
-void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw)
-{
-	u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR);
-	u16 pcie_devctl2;
-
-	/* only take action if timeout value is defaulted to 0 */
-	if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK)
-		goto out;
-
-	/*
-	 * if capababilities version is type 1 we can write the
-	 * timeout of 10ms to 250ms through the GCR register
-	 */
-	if (!(gcr & IXGBE_GCR_CAP_VER2)) {
-		gcr |= IXGBE_GCR_CMPL_TMOUT_10ms;
-		goto out;
-	}
-
-	/*
-	 * for version 2 capabilities we need to write the config space
-	 * directly in order to set the completion timeout value for
-	 * 16ms to 55ms
-	 */
-	pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2);
-	pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms;
-	IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2);
-out:
-	/* disable completion timeout resend */
-	gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND;
-	IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr);
-}
-
-/**
- *  ixgbe_init_ops_82598 - Inits func ptrs and MAC type
- *  @hw: pointer to hardware structure
- *
- *  Initialize the function pointers and assign the MAC type for 82598.
- *  Does not touch the hardware.
- **/
-s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	struct ixgbe_phy_info *phy = &hw->phy;
-	s32 ret_val;
-
-	ret_val = ixgbe_init_phy_ops_generic(hw);
-	ret_val = ixgbe_init_ops_generic(hw);
-
-	/* PHY */
-	phy->ops.init = &ixgbe_init_phy_ops_82598;
-
-	/* MAC */
-	mac->ops.start_hw = &ixgbe_start_hw_82598;
-	mac->ops.reset_hw = &ixgbe_reset_hw_82598;
-	mac->ops.get_media_type = &ixgbe_get_media_type_82598;
-	mac->ops.get_supported_physical_layer =
-				&ixgbe_get_supported_physical_layer_82598;
-	mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598;
-	mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598;
-	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598;
-
-	/* RAR, Multicast, VLAN */
-	mac->ops.set_vmdq = &ixgbe_set_vmdq_82598;
-	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598;
-	mac->ops.set_vfta = &ixgbe_set_vfta_82598;
-	mac->ops.set_vlvf = NULL;
-	mac->ops.clear_vfta = &ixgbe_clear_vfta_82598;
-
-	/* Flow Control */
-	mac->ops.fc_enable = &ixgbe_fc_enable_82598;
-
-	mac->mcft_size		= 128;
-	mac->vft_size		= 128;
-	mac->num_rar_entries	= 16;
-	mac->rx_pb_size		= 512;
-	mac->max_tx_queues	= 32;
-	mac->max_rx_queues	= 64;
-	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
-
-	/* SFP+ Module */
-	phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598;
-
-	/* Link */
-	mac->ops.check_link = &ixgbe_check_mac_link_82598;
-	mac->ops.setup_link = &ixgbe_setup_mac_link_82598;
-	mac->ops.flap_tx_laser = NULL;
-	mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598;
-	mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598;
-
-	/* Manageability interface */
-	mac->ops.set_fw_drv_ver = NULL;
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_init_phy_ops_82598 - PHY/SFP specific init
- *  @hw: pointer to hardware structure
- *
- *  Initialize any function pointers that were not able to be
- *  set during init_shared_code because the PHY/SFP type was
- *  not known.  Perform the SFP init if necessary.
- *
- **/
-s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	struct ixgbe_phy_info *phy = &hw->phy;
-	s32 ret_val = 0;
-	u16 list_offset, data_offset;
-
-	/* Identify the PHY */
-	phy->ops.identify(hw);
-
-	/* Overwrite the link function pointers if copper PHY */
-	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
-		mac->ops.setup_link = &ixgbe_setup_copper_link_82598;
-		mac->ops.get_link_capabilities =
-				&ixgbe_get_copper_link_capabilities_generic;
-	}
-
-	switch (hw->phy.type) {
-	case ixgbe_phy_tn:
-		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
-		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
-		phy->ops.get_firmware_version =
-					&ixgbe_get_phy_firmware_version_tnx;
-		break;
-	case ixgbe_phy_nl:
-		phy->ops.reset = &ixgbe_reset_phy_nl;
-
-		/* Call SFP+ identify routine to get the SFP+ module type */
-		ret_val = phy->ops.identify_sfp(hw);
-		if (ret_val != 0)
-			goto out;
-		else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) {
-			ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
-			goto out;
-		}
-
-		/* Check to see if SFP+ module is supported */
-		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw,
-							      &list_offset,
-							      &data_offset);
-		if (ret_val != 0) {
-			ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
-			goto out;
-		}
-		break;
-	default:
-		break;
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx
- *  @hw: pointer to hardware structure
- *
- *  Starts the hardware using the generic start_hw function.
- *  Disables relaxed ordering Then set pcie completion timeout
- *
- **/
-s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
-{
-	u32 regval;
-	u32 i;
-	s32 ret_val = 0;
-
-	ret_val = ixgbe_start_hw_generic(hw);
-
-	/* Disable relaxed ordering */
-	for (i = 0; ((i < hw->mac.max_tx_queues) &&
-	     (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-		regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-		regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-		IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
-	}
-
-	for (i = 0; ((i < hw->mac.max_rx_queues) &&
-	     (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-		regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-		regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-			    IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-		IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-	}
-
-	/* set the completion timeout for interface */
-	if (ret_val == 0)
-		ixgbe_set_pcie_completion_timeout(hw);
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_get_link_capabilities_82598 - Determines link capabilities
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @autoneg: boolean auto-negotiation value
- *
- *  Determines the link capabilities by reading the AUTOC register.
- **/
-static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
-					     ixgbe_link_speed *speed,
-					     bool *autoneg)
-{
-	s32 status = 0;
-	u32 autoc = 0;
-
-	/*
-	 * Determine link capabilities based on the stored value of AUTOC,
-	 * which represents EEPROM defaults.  If AUTOC value has not been
-	 * stored, use the current register value.
-	 */
-	if (hw->mac.orig_link_settings_stored)
-		autoc = hw->mac.orig_autoc;
-	else
-		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-
-	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
-	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-		*autoneg = false;
-		break;
-
-	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
-		*speed = IXGBE_LINK_SPEED_10GB_FULL;
-		*autoneg = false;
-		break;
-
-	case IXGBE_AUTOC_LMS_1G_AN:
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-		*autoneg = true;
-		break;
-
-	case IXGBE_AUTOC_LMS_KX4_AN:
-	case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
-		*speed = IXGBE_LINK_SPEED_UNKNOWN;
-		if (autoc & IXGBE_AUTOC_KX4_SUPP)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (autoc & IXGBE_AUTOC_KX_SUPP)
-			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
-		*autoneg = true;
-		break;
-
-	default:
-		status = IXGBE_ERR_LINK_SETUP;
-		break;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_get_media_type_82598 - Determines media type
- *  @hw: pointer to hardware structure
- *
- *  Returns the media type (fiber, copper, backplane)
- **/
-static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw)
-{
-	enum ixgbe_media_type media_type;
-
-	/* Detect if there is a copper PHY attached. */
-	switch (hw->phy.type) {
-	case ixgbe_phy_cu_unknown:
-	case ixgbe_phy_tn:
-		media_type = ixgbe_media_type_copper;
-		goto out;
-	default:
-		break;
-	}
-
-	/* Media type for I82598 is based on device ID */
-	switch (hw->device_id) {
-	case IXGBE_DEV_ID_82598:
-	case IXGBE_DEV_ID_82598_BX:
-		/* Default device ID is mezzanine card KX/KX4 */
-		media_type = ixgbe_media_type_backplane;
-		break;
-	case IXGBE_DEV_ID_82598AF_DUAL_PORT:
-	case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
-	case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
-	case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
-	case IXGBE_DEV_ID_82598EB_XF_LR:
-	case IXGBE_DEV_ID_82598EB_SFP_LOM:
-		media_type = ixgbe_media_type_fiber;
-		break;
-	case IXGBE_DEV_ID_82598EB_CX4:
-	case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
-		media_type = ixgbe_media_type_cx4;
-		break;
-	case IXGBE_DEV_ID_82598AT:
-	case IXGBE_DEV_ID_82598AT2:
-		media_type = ixgbe_media_type_copper;
-		break;
-	default:
-		media_type = ixgbe_media_type_unknown;
-		break;
-	}
-out:
-	return media_type;
-}
-
-/**
- *  ixgbe_fc_enable_82598 - Enable flow control
- *  @hw: pointer to hardware structure
- *
- *  Enable flow control according to the current settings.
- **/
-s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-	u32 fctrl_reg;
-	u32 rmcs_reg;
-	u32 reg;
-	u32 fcrtl, fcrth;
-	u32 link_speed = 0;
-	int i;
-	bool link_up;
-
-	/* Validate the water mark configuration */
-	if (!hw->fc.pause_time) {
-		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
-		goto out;
-	}
-
-	/* Low water mark of zero causes XOFF floods */
-	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
-		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
-		    hw->fc.high_water[i]) {
-			if (!hw->fc.low_water[i] ||
-			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
-				hw_dbg(hw, "Invalid water mark configuration\n");
-				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
-				goto out;
-			}
-		}
-	}
-
-	/*
-	 * On 82598 having Rx FC on causes resets while doing 1G
-	 * so if it's on turn it off once we know link_speed. For
-	 * more details see 82598 Specification update.
-	 */
-	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-	if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) {
-		switch (hw->fc.requested_mode) {
-		case ixgbe_fc_full:
-			hw->fc.requested_mode = ixgbe_fc_tx_pause;
-			break;
-		case ixgbe_fc_rx_pause:
-			hw->fc.requested_mode = ixgbe_fc_none;
-			break;
-		default:
-			/* no change */
-			break;
-		}
-	}
-
-	/* Negotiate the fc mode to use */
-	ixgbe_fc_autoneg(hw);
-
-	/* Disable any previous flow control settings */
-	fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-	fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE);
-
-	rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
-	rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X);
-
-	/*
-	 * The possible values of fc.current_mode are:
-	 * 0: Flow control is completely disabled
-	 * 1: Rx flow control is enabled (we can receive pause frames,
-	 *    but not send pause frames).
-	 * 2: Tx flow control is enabled (we can send pause frames but
-	 *     we do not support receiving pause frames).
-	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
-	 * other: Invalid.
-	 */
-	switch (hw->fc.current_mode) {
-	case ixgbe_fc_none:
-		/*
-		 * Flow control is disabled by software override or autoneg.
-		 * The code below will actually disable it in the HW.
-		 */
-		break;
-	case ixgbe_fc_rx_pause:
-		/*
-		 * Rx Flow control is enabled and Tx Flow control is
-		 * disabled by software override. Since there really
-		 * isn't a way to advertise that we are capable of RX
-		 * Pause ONLY, we will advertise that we support both
-		 * symmetric and asymmetric Rx PAUSE.  Later, we will
-		 * disable the adapter's ability to send PAUSE frames.
-		 */
-		fctrl_reg |= IXGBE_FCTRL_RFCE;
-		break;
-	case ixgbe_fc_tx_pause:
-		/*
-		 * Tx Flow control is enabled, and Rx Flow control is
-		 * disabled by software override.
-		 */
-		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
-		break;
-	case ixgbe_fc_full:
-		/* Flow control (both Rx and Tx) is enabled by SW override. */
-		fctrl_reg |= IXGBE_FCTRL_RFCE;
-		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
-		break;
-	default:
-		hw_dbg(hw, "Flow control param set incorrectly\n");
-		ret_val = IXGBE_ERR_CONFIG;
-		goto out;
-		break;
-	}
-
-	/* Set 802.3x based flow control settings. */
-	fctrl_reg |= IXGBE_FCTRL_DPF;
-	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg);
-	IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg);
-
-	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
-	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
-		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
-		    hw->fc.high_water[i]) {
-			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
-			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth);
-		} else {
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0);
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0);
-		}
-
-	}
-
-	/* Configure pause time (2 TCs per register) */
-	reg = hw->fc.pause_time * 0x00010001;
-	for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
-		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
-
-	/* Configure flow control refresh threshold value */
-	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_start_mac_link_82598 - Configures MAC link settings
- *  @hw: pointer to hardware structure
- *
- *  Configures link settings based on values in the ixgbe_hw struct.
- *  Restarts the link.  Performs autonegotiation if needed.
- **/
-static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
-				      bool autoneg_wait_to_complete)
-{
-	u32 autoc_reg;
-	u32 links_reg;
-	u32 i;
-	s32 status = 0;
-
-	/* Restart link */
-	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
-	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
-
-	/* Only poll for autoneg to complete if specified to do so */
-	if (autoneg_wait_to_complete) {
-		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
-		     IXGBE_AUTOC_LMS_KX4_AN ||
-		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
-		     IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
-			links_reg = 0; /* Just in case Autoneg time = 0 */
-			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
-				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
-					break;
-				msleep(100);
-			}
-			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
-				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
-				hw_dbg(hw, "Autonegotiation did not complete.\n");
-			}
-		}
-	}
-
-	/* Add delay to filter out noises during initial link setup */
-	msleep(50);
-
-	return status;
-}
-
-/**
- *  ixgbe_validate_link_ready - Function looks for phy link
- *  @hw: pointer to hardware structure
- *
- *  Function indicates success when phy link is available. If phy is not ready
- *  within 5 seconds of MAC indicating link, the function returns error.
- **/
-static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
-{
-	u32 timeout;
-	u16 an_reg;
-
-	if (hw->device_id != IXGBE_DEV_ID_82598AT2)
-		return 0;
-
-	for (timeout = 0;
-	     timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) {
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg);
-
-		if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) &&
-		    (an_reg & IXGBE_MII_AUTONEG_LINK_UP))
-			break;
-
-		msleep(100);
-	}
-
-	if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) {
-		hw_dbg(hw, "Link was indicated but link is down\n");
-		return IXGBE_ERR_LINK_SETUP;
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_check_mac_link_82598 - Get link/speed status
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @link_up: true is link is up, false otherwise
- *  @link_up_wait_to_complete: bool used to wait for link up or not
- *
- *  Reads the links register to determine if link is up and the current speed
- **/
-static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
-				      ixgbe_link_speed *speed, bool *link_up,
-				      bool link_up_wait_to_complete)
-{
-	u32 links_reg;
-	u32 i;
-	u16 link_reg, adapt_comp_reg;
-
-	/*
-	 * SERDES PHY requires us to read link status from undocumented
-	 * register 0xC79F.  Bit 0 set indicates link is up/ready; clear
-	 * indicates link down.  OxC00C is read to check that the XAUI lanes
-	 * are active.  Bit 0 clear indicates active; set indicates inactive.
-	 */
-	if (hw->phy.type == ixgbe_phy_nl) {
-		hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
-		hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
-		hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV,
-				     &adapt_comp_reg);
-		if (link_up_wait_to_complete) {
-			for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
-				if ((link_reg & 1) &&
-				    ((adapt_comp_reg & 1) == 0)) {
-					*link_up = true;
-					break;
-				} else {
-					*link_up = false;
-				}
-				msleep(100);
-				hw->phy.ops.read_reg(hw, 0xC79F,
-						     IXGBE_TWINAX_DEV,
-						     &link_reg);
-				hw->phy.ops.read_reg(hw, 0xC00C,
-						     IXGBE_TWINAX_DEV,
-						     &adapt_comp_reg);
-			}
-		} else {
-			if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0))
-				*link_up = true;
-			else
-				*link_up = false;
-		}
-
-		if (*link_up == false)
-			goto out;
-	}
-
-	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-	if (link_up_wait_to_complete) {
-		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
-			if (links_reg & IXGBE_LINKS_UP) {
-				*link_up = true;
-				break;
-			} else {
-				*link_up = false;
-			}
-			msleep(100);
-			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-		}
-	} else {
-		if (links_reg & IXGBE_LINKS_UP)
-			*link_up = true;
-		else
-			*link_up = false;
-	}
-
-	if (links_reg & IXGBE_LINKS_SPEED)
-		*speed = IXGBE_LINK_SPEED_10GB_FULL;
-	else
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-
-	if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) &&
-	    (ixgbe_validate_link_ready(hw) != 0))
-		*link_up = false;
-
-out:
-	return 0;
-}
-
-/**
- *  ixgbe_setup_mac_link_82598 - Set MAC link speed
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- *
- *  Set the link speed in the AUTOC register and restarts link.
- **/
-static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
-				      ixgbe_link_speed speed, bool autoneg,
-				      bool autoneg_wait_to_complete)
-{
-	s32 status = 0;
-	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
-	u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 autoc = curr_autoc;
-	u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
-
-	/* Check to see if speed passed in is supported. */
-	ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
-	speed &= link_capabilities;
-
-	if (speed == IXGBE_LINK_SPEED_UNKNOWN)
-		status = IXGBE_ERR_LINK_SETUP;
-
-	/* Set KX4/KX support according to speed requested */
-	else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN ||
-		 link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
-		autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK;
-		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
-			autoc |= IXGBE_AUTOC_KX4_SUPP;
-		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
-			autoc |= IXGBE_AUTOC_KX_SUPP;
-		if (autoc != curr_autoc)
-			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
-	}
-
-	if (status == 0) {
-		/*
-		 * Setup and restart the link based on the new values in
-		 * ixgbe_hw This will write the AUTOC register based on the new
-		 * stored values
-		 */
-		status = ixgbe_start_mac_link_82598(hw,
-						    autoneg_wait_to_complete);
-	}
-
-	return status;
-}
-
-
-/**
- *  ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true if waiting is needed to complete
- *
- *  Sets the link speed in the AUTOC register in the MAC and restarts link.
- **/
-static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
-					 ixgbe_link_speed speed,
-					 bool autoneg,
-					 bool autoneg_wait_to_complete)
-{
-	s32 status;
-
-	/* Setup the PHY according to input speed */
-	status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
-					      autoneg_wait_to_complete);
-	/* Set up MAC */
-	ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
-
-	return status;
-}
-
-/**
- *  ixgbe_reset_hw_82598 - Performs hardware reset
- *  @hw: pointer to hardware structure
- *
- *  Resets the hardware by resetting the transmit and receive units, masks and
- *  clears all interrupts, performing a PHY reset, and performing a link (MAC)
- *  reset.
- **/
-static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	s32 phy_status = 0;
-	u32 ctrl;
-	u32 gheccr;
-	u32 i;
-	u32 autoc;
-	u8  analog_val;
-
-	/* Call adapter stop to disable tx/rx and clear interrupts */
-	status = hw->mac.ops.stop_adapter(hw);
-	if (status != 0)
-		goto reset_hw_out;
-
-	/*
-	 * Power up the Atlas Tx lanes if they are currently powered down.
-	 * Atlas Tx lanes are powered down for MAC loopback tests, but
-	 * they are not automatically restored on reset.
-	 */
-	hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val);
-	if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) {
-		/* Enable Tx Atlas so packets can be transmitted again */
-		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
-					     &analog_val);
-		analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN;
-		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
-					      analog_val);
-
-		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
-					     &analog_val);
-		analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
-		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
-					      analog_val);
-
-		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
-					     &analog_val);
-		analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
-		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
-					      analog_val);
-
-		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
-					     &analog_val);
-		analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
-		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
-					      analog_val);
-	}
-
-	/* Reset PHY */
-	if (hw->phy.reset_disable == false) {
-		/* PHY ops must be identified and initialized prior to reset */
-
-		/* Init PHY and function pointers, perform SFP setup */
-		phy_status = hw->phy.ops.init(hw);
-		if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED)
-			goto reset_hw_out;
-		if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT)
-			goto mac_reset_top;
-
-		hw->phy.ops.reset(hw);
-	}
-
-mac_reset_top:
-	/*
-	 * Issue global reset to the MAC.  This needs to be a SW reset.
-	 * If link reset is used, it might reset the MAC when mng is using it
-	 */
-	ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
-	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Poll for reset bit to self-clear indicating reset is complete */
-	for (i = 0; i < 10; i++) {
-		udelay(1);
-		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
-		if (!(ctrl & IXGBE_CTRL_RST))
-			break;
-	}
-	if (ctrl & IXGBE_CTRL_RST) {
-		status = IXGBE_ERR_RESET_FAILED;
-		hw_dbg(hw, "Reset polling failed to complete.\n");
-	}
-
-	msleep(50);
-
-	/*
-	 * Double resets are required for recovery from certain error
-	 * conditions.  Between resets, it is necessary to stall to allow time
-	 * for any pending HW events to complete.
-	 */
-	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
-		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
-		goto mac_reset_top;
-	}
-
-	gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR);
-	gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6));
-	IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr);
-
-	/*
-	 * Store the original AUTOC value if it has not been
-	 * stored off yet.  Otherwise restore the stored original
-	 * AUTOC value since the reset operation sets back to deaults.
-	 */
-	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	if (hw->mac.orig_link_settings_stored == false) {
-		hw->mac.orig_autoc = autoc;
-		hw->mac.orig_link_settings_stored = true;
-	} else if (autoc != hw->mac.orig_autoc) {
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc);
-	}
-
-	/* Store the permanent mac address */
-	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
-
-	/*
-	 * Store MAC address from RAR0, clear receive address registers, and
-	 * clear the multicast table
-	 */
-	hw->mac.ops.init_rx_addrs(hw);
-
-reset_hw_out:
-	if (phy_status != 0)
-		status = phy_status;
-
-	return status;
-}
-
-/**
- *  ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address
- *  @hw: pointer to hardware struct
- *  @rar: receive address register index to associate with a VMDq index
- *  @vmdq: VMDq set index
- **/
-s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	u32 rar_high;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/* Make sure we are using a valid rar index range */
-	if (rar >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
-	rar_high &= ~IXGBE_RAH_VIND_MASK;
-	rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK);
-	IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
-	return 0;
-}
-
-/**
- *  ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address
- *  @hw: pointer to hardware struct
- *  @rar: receive address register index to associate with a VMDq index
- *  @vmdq: VMDq clear index (not used in 82598, but elsewhere)
- **/
-static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	u32 rar_high;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-
-	/* Make sure we are using a valid rar index range */
-	if (rar >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
-	if (rar_high & IXGBE_RAH_VIND_MASK) {
-		rar_high &= ~IXGBE_RAH_VIND_MASK;
-		IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_set_vfta_82598 - Set VLAN filter table
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *  @vind: VMDq output index that maps queue to VLAN id in VFTA
- *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
- *
- *  Turn on/off specified VLAN in the VLAN filter table.
- **/
-s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-			 bool vlan_on)
-{
-	u32 regindex;
-	u32 bitindex;
-	u32 bits;
-	u32 vftabyte;
-
-	if (vlan > 4095)
-		return IXGBE_ERR_PARAM;
-
-	/* Determine 32-bit word position in array */
-	regindex = (vlan >> 5) & 0x7F;   /* upper seven bits */
-
-	/* Determine the location of the (VMD) queue index */
-	vftabyte =  ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */
-	bitindex = (vlan & 0x7) << 2;    /* lower 3 bits indicate nibble */
-
-	/* Set the nibble for VMD queue index */
-	bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex));
-	bits &= (~(0x0F << bitindex));
-	bits |= (vind << bitindex);
-	IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits);
-
-	/* Determine the location of the bit for this VLAN id */
-	bitindex = vlan & 0x1F;   /* lower five bits */
-
-	bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
-	if (vlan_on)
-		/* Turn on this VLAN id */
-		bits |= (1 << bitindex);
-	else
-		/* Turn off this VLAN id */
-		bits &= ~(1 << bitindex);
-	IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits);
-
-	return 0;
-}
-
-/**
- *  ixgbe_clear_vfta_82598 - Clear VLAN filter table
- *  @hw: pointer to hardware structure
- *
- *  Clears the VLAN filer table, and the VMDq index associated with the filter
- **/
-static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
-{
-	u32 offset;
-	u32 vlanbyte;
-
-	for (offset = 0; offset < hw->mac.vft_size; offset++)
-		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
-
-	for (vlanbyte = 0; vlanbyte < 4; vlanbyte++)
-		for (offset = 0; offset < hw->mac.vft_size; offset++)
-			IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset),
-					0);
-
-	return 0;
-}
-
-/**
- *  ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register
- *  @hw: pointer to hardware structure
- *  @reg: analog register to read
- *  @val: read value
- *
- *  Performs read operation to Atlas analog register specified.
- **/
-s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
-{
-	u32  atlas_ctl;
-
-	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL,
-			IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(10);
-	atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
-	*val = (u8)atlas_ctl;
-
-	return 0;
-}
-
-/**
- *  ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register
- *  @hw: pointer to hardware structure
- *  @reg: atlas register to write
- *  @val: value to write
- *
- *  Performs write operation to Atlas analog register specified.
- **/
-s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
-{
-	u32  atlas_ctl;
-
-	atlas_ctl = (reg << 8) | val;
-	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(10);
-
-	return 0;
-}
-
-/**
- *  ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface.
- *  @hw: pointer to hardware structure
- *  @byte_offset: EEPROM byte offset to read
- *  @eeprom_data: value read
- *
- *  Performs 8 byte read operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
-				u8 *eeprom_data)
-{
-	s32 status = 0;
-	u16 sfp_addr = 0;
-	u16 sfp_data = 0;
-	u16 sfp_stat = 0;
-	u32 i;
-
-	if (hw->phy.type == ixgbe_phy_nl) {
-		/*
-		 * NetLogic phy SDA/SCL registers are at addresses 0xC30A to
-		 * 0xC30D. These registers are used to talk to the SFP+
-		 * module's EEPROM through the SDA/SCL (I2C) interface.
-		 */
-		sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset;
-		sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK);
-		hw->phy.ops.write_reg(hw,
-				      IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR,
-				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-				      sfp_addr);
-
-		/* Poll status */
-		for (i = 0; i < 100; i++) {
-			hw->phy.ops.read_reg(hw,
-					     IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT,
-					     IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-					     &sfp_stat);
-			sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK;
-			if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS)
-				break;
-			msleep(10);
-		}
-
-		if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) {
-			hw_dbg(hw, "EEPROM read did not pass.\n");
-			status = IXGBE_ERR_SFP_NOT_PRESENT;
-			goto out;
-		}
-
-		/* Read data */
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA,
-				     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data);
-
-		*eeprom_data = (u8)(sfp_data >> 8);
-	} else {
-		status = IXGBE_ERR_PHY;
-		goto out;
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_get_supported_physical_layer_82598 - Returns physical layer type
- *  @hw: pointer to hardware structure
- *
- *  Determines physical layer capabilities of the current configuration.
- **/
-u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw)
-{
-	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
-	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
-	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
-	u16 ext_ability = 0;
-
-	hw->phy.ops.identify(hw);
-
-	/* Copper PHY must be checked before AUTOC LMS to determine correct
-	 * physical layer because 10GBase-T PHYs use LMS = KX4/KX */
-	switch (hw->phy.type) {
-	case ixgbe_phy_tn:
-	case ixgbe_phy_cu_unknown:
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
-		IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
-		if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
-		if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
-		if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
-		goto out;
-	default:
-		break;
-	}
-
-	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
-	case IXGBE_AUTOC_LMS_1G_AN:
-	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
-		if (pma_pmd_1g == IXGBE_AUTOC_1G_KX)
-			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX;
-		else
-			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX;
-		break;
-	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
-		if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
-		else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
-		else /* XAUI */
-			physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
-		break;
-	case IXGBE_AUTOC_LMS_KX4_AN:
-	case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
-		if (autoc & IXGBE_AUTOC_KX_SUPP)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
-		if (autoc & IXGBE_AUTOC_KX4_SUPP)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
-		break;
-	default:
-		break;
-	}
-
-	if (hw->phy.type == ixgbe_phy_nl) {
-		hw->phy.ops.identify_sfp(hw);
-
-		switch (hw->phy.sfp_type) {
-		case ixgbe_sfp_type_da_cu:
-			physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
-			break;
-		case ixgbe_sfp_type_sr:
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
-			break;
-		case ixgbe_sfp_type_lr:
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
-			break;
-		default:
-			physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
-			break;
-		}
-	}
-
-	switch (hw->device_id) {
-	case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
-		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
-		break;
-	case IXGBE_DEV_ID_82598AF_DUAL_PORT:
-	case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
-	case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
-		physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
-		break;
-	case IXGBE_DEV_ID_82598EB_XF_LR:
-		physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
-		break;
-	default:
-		break;
-	}
-
-out:
-	return physical_layer;
-}
-
-/**
- *  ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple
- *  port devices.
- *  @hw: pointer to the HW structure
- *
- *  Calls common function and corrects issue with some single port devices
- *  that enable LAN1 but not LAN0.
- **/
-void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw)
-{
-	struct ixgbe_bus_info *bus = &hw->bus;
-	u16 pci_gen = 0;
-	u16 pci_ctrl2 = 0;
-
-	ixgbe_set_lan_id_multi_port_pcie(hw);
-
-	/* check if LAN0 is disabled */
-	hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen);
-	if ((pci_gen != 0) && (pci_gen != 0xFFFF)) {
-
-		hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2);
-
-		/* if LAN0 is completely disabled force function to 0 */
-		if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) &&
-		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) &&
-		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) {
-
-			bus->func = 0;
-		}
-	}
-}
-
-/**
- * ixgbe_set_rxpba_82598 - Initialize RX packet buffer
- * @hw: pointer to hardware structure
- * @num_pb: number of packet buffers to allocate
- * @headroom: reserve n KB of headroom
- * @strategy: packet buffer allocation strategy
- **/
-static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
-				  u32 headroom, int strategy)
-{
-	u32 rxpktsize = IXGBE_RXPBSIZE_64KB;
-	u8 i = 0;
-
-	if (!num_pb)
-		return;
-
-	/* Setup Rx packet buffer sizes */
-	switch (strategy) {
-	case PBA_STRATEGY_WEIGHTED:
-		/* Setup the first four at 80KB */
-		rxpktsize = IXGBE_RXPBSIZE_80KB;
-		for (; i < 4; i++)
-			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
-		/* Setup the last four at 48KB...don't re-init i */
-		rxpktsize = IXGBE_RXPBSIZE_48KB;
-		/* Fall Through */
-	case PBA_STRATEGY_EQUAL:
-	default:
-		/* Divide the remaining Rx packet buffer evenly among the TCs */
-		for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
-			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
-		break;
-	}
-
-	/* Setup Tx packet buffer sizes */
-	for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB);
-
-	return;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
deleted file mode 100644
index 9a8c670a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_82598_H_
-#define _IXGBE_82598_H_
-
-u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw);
-s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw);
-s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on);
-s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val);
-s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val);
-s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
-				u8 *eeprom_data);
-u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw);
-s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw);
-void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw);
-void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw);
-#endif /* _IXGBE_82598_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
deleted file mode 100644
index 3f159123..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
+++ /dev/null
@@ -1,2299 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_type.h"
-#include "ixgbe_82599.h"
-#include "ixgbe_api.h"
-#include "ixgbe_common.h"
-#include "ixgbe_phy.h"
-
-static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
-					 ixgbe_link_speed speed,
-					 bool autoneg,
-					 bool autoneg_wait_to_complete);
-static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
-static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
-				   u16 offset, u16 *data);
-static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
-					  u16 words, u16 *data);
-static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
-					u8 dev_addr, u8 *data);
-static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
-					u8 dev_addr, u8 data);
-
-void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-
-	/* enable the laser control functions for SFP+ fiber */
-	if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) {
-		mac->ops.disable_tx_laser =
-				       &ixgbe_disable_tx_laser_multispeed_fiber;
-		mac->ops.enable_tx_laser =
-					&ixgbe_enable_tx_laser_multispeed_fiber;
-		mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber;
-
-	} else {
-		mac->ops.disable_tx_laser = NULL;
-		mac->ops.enable_tx_laser = NULL;
-		mac->ops.flap_tx_laser = NULL;
-	}
-
-	if (hw->phy.multispeed_fiber) {
-		/* Set up dual speed SFP+ support */
-		mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber;
-	} else {
-		if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) &&
-		     (hw->phy.smart_speed == ixgbe_smart_speed_auto ||
-		      hw->phy.smart_speed == ixgbe_smart_speed_on) &&
-		      !ixgbe_verify_lesm_fw_enabled_82599(hw)) {
-			mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed;
-		} else {
-			mac->ops.setup_link = &ixgbe_setup_mac_link_82599;
-		}
-	}
-}
-
-/**
- *  ixgbe_init_phy_ops_82599 - PHY/SFP specific init
- *  @hw: pointer to hardware structure
- *
- *  Initialize any function pointers that were not able to be
- *  set during init_shared_code because the PHY/SFP type was
- *  not known.  Perform the SFP init if necessary.
- *
- **/
-s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	struct ixgbe_phy_info *phy = &hw->phy;
-	s32 ret_val = 0;
-	u32 esdp;
-
-	if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) {
-		/* Store flag indicating I2C bus access control unit. */
-		hw->phy.qsfp_shared_i2c_bus = TRUE;
-
-		/* Initialize access to QSFP+ I2C bus */
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-		esdp |= IXGBE_ESDP_SDP0_DIR;
-		esdp &= ~IXGBE_ESDP_SDP1_DIR;
-		esdp &= ~IXGBE_ESDP_SDP0;
-		esdp &= ~IXGBE_ESDP_SDP0_NATIVE;
-		esdp &= ~IXGBE_ESDP_SDP1_NATIVE;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_FLUSH(hw);
-
-		phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599;
-		phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599;
-	}
-	/* Identify the PHY or SFP module */
-	ret_val = phy->ops.identify(hw);
-	if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
-		goto init_phy_ops_out;
-
-	/* Setup function pointers based on detected SFP module and speeds */
-	ixgbe_init_mac_link_ops_82599(hw);
-	if (hw->phy.sfp_type != ixgbe_sfp_type_unknown)
-		hw->phy.ops.reset = NULL;
-
-	/* If copper media, overwrite with copper function pointers */
-	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
-		mac->ops.setup_link = &ixgbe_setup_copper_link_82599;
-		mac->ops.get_link_capabilities =
-				  &ixgbe_get_copper_link_capabilities_generic;
-	}
-
-	/* Set necessary function pointers based on phy type */
-	switch (hw->phy.type) {
-	case ixgbe_phy_tn:
-		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
-		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
-		phy->ops.get_firmware_version =
-			     &ixgbe_get_phy_firmware_version_tnx;
-		break;
-	default:
-		break;
-	}
-init_phy_ops_out:
-	return ret_val;
-}
-
-s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-	u32 reg_anlp1 = 0;
-	u32 i = 0;
-	u16 list_offset, data_offset, data_value;
-
-	if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) {
-		ixgbe_init_mac_link_ops_82599(hw);
-
-		hw->phy.ops.reset = NULL;
-
-		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
-							      &data_offset);
-		if (ret_val != 0)
-			goto setup_sfp_out;
-
-		/* PHY config will finish before releasing the semaphore */
-		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
-							IXGBE_GSSR_MAC_CSR_SM);
-		if (ret_val != 0) {
-			ret_val = IXGBE_ERR_SWFW_SYNC;
-			goto setup_sfp_out;
-		}
-
-		hw->eeprom.ops.read(hw, ++data_offset, &data_value);
-		while (data_value != 0xffff) {
-			IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
-			IXGBE_WRITE_FLUSH(hw);
-			hw->eeprom.ops.read(hw, ++data_offset, &data_value);
-		}
-
-		/* Release the semaphore */
-		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
-		/* Delay obtaining semaphore again to allow FW access */
-		msleep(hw->eeprom.semaphore_delay);
-
-		/* Now restart DSP by setting Restart_AN and clearing LMS */
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw,
-				IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) |
-				IXGBE_AUTOC_AN_RESTART));
-
-		/* Wait for AN to leave state 0 */
-		for (i = 0; i < 10; i++) {
-			msleep(4);
-			reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1);
-			if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)
-				break;
-		}
-		if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) {
-			hw_dbg(hw, "sfp module setup not complete\n");
-			ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
-			goto setup_sfp_out;
-		}
-
-		/* Restart DSP by setting Restart_AN and return to SFI mode */
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw,
-				IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL |
-				IXGBE_AUTOC_AN_RESTART));
-	}
-
-setup_sfp_out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_init_ops_82599 - Inits func ptrs and MAC type
- *  @hw: pointer to hardware structure
- *
- *  Initialize the function pointers and assign the MAC type for 82599.
- *  Does not touch the hardware.
- **/
-
-s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	struct ixgbe_phy_info *phy = &hw->phy;
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	s32 ret_val;
-
-	ixgbe_init_phy_ops_generic(hw);
-	ret_val = ixgbe_init_ops_generic(hw);
-
-	/* PHY */
-	phy->ops.identify = &ixgbe_identify_phy_82599;
-	phy->ops.init = &ixgbe_init_phy_ops_82599;
-
-	/* MAC */
-	mac->ops.reset_hw = &ixgbe_reset_hw_82599;
-	mac->ops.get_media_type = &ixgbe_get_media_type_82599;
-	mac->ops.get_supported_physical_layer =
-				    &ixgbe_get_supported_physical_layer_82599;
-	mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
-	mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
-	mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599;
-	mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599;
-	mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599;
-	mac->ops.start_hw = &ixgbe_start_hw_82599;
-	mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
-	mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
-	mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
-	mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
-	mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
-
-	/* RAR, Multicast, VLAN */
-	mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
-	mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
-	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
-	mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
-	mac->rar_highwater = 1;
-	mac->ops.set_vfta = &ixgbe_set_vfta_generic;
-	mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
-	mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
-	mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
-	mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599;
-	mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
-	mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
-
-	/* Link */
-	mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599;
-	mac->ops.check_link = &ixgbe_check_mac_link_generic;
-	mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
-	ixgbe_init_mac_link_ops_82599(hw);
-
-	mac->mcft_size		= 128;
-	mac->vft_size		= 128;
-	mac->num_rar_entries	= 128;
-	mac->rx_pb_size		= 512;
-	mac->max_tx_queues	= 128;
-	mac->max_rx_queues	= 128;
-	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
-
-	mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
-				   IXGBE_FWSM_MODE_MASK) ? true : false;
-
-	//hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
-
-	/* EEPROM */
-	eeprom->ops.read = &ixgbe_read_eeprom_82599;
-	eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599;
-
-	/* Manageability interface */
-	mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
-
-	mac->ops.get_thermal_sensor_data =
-					 &ixgbe_get_thermal_sensor_data_generic;
-	mac->ops.init_thermal_sensor_thresh =
-				      &ixgbe_init_thermal_sensor_thresh_generic;
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_get_link_capabilities_82599 - Determines link capabilities
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @negotiation: true when autoneg or autotry is enabled
- *
- *  Determines the link capabilities by reading the AUTOC register.
- **/
-s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
-				      ixgbe_link_speed *speed,
-				      bool *negotiation)
-{
-	s32 status = 0;
-	u32 autoc = 0;
-
-	/* Check if 1G SFP module. */
-	if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
-	    hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
-	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
-	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = true;
-		goto out;
-	}
-
-	/*
-	 * Determine link capabilities based on the stored value of AUTOC,
-	 * which represents EEPROM defaults.  If AUTOC value has not
-	 * been stored, use the current register values.
-	 */
-	if (hw->mac.orig_link_settings_stored)
-		autoc = hw->mac.orig_autoc;
-	else
-		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-
-	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
-	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = false;
-		break;
-
-	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
-		*speed = IXGBE_LINK_SPEED_10GB_FULL;
-		*negotiation = false;
-		break;
-
-	case IXGBE_AUTOC_LMS_1G_AN:
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = true;
-		break;
-
-	case IXGBE_AUTOC_LMS_10G_SERIAL:
-		*speed = IXGBE_LINK_SPEED_10GB_FULL;
-		*negotiation = false;
-		break;
-
-	case IXGBE_AUTOC_LMS_KX4_KX_KR:
-	case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
-		*speed = IXGBE_LINK_SPEED_UNKNOWN;
-		if (autoc & IXGBE_AUTOC_KR_SUPP)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (autoc & IXGBE_AUTOC_KX4_SUPP)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (autoc & IXGBE_AUTOC_KX_SUPP)
-			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = true;
-		break;
-
-	case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII:
-		*speed = IXGBE_LINK_SPEED_100_FULL;
-		if (autoc & IXGBE_AUTOC_KR_SUPP)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (autoc & IXGBE_AUTOC_KX4_SUPP)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (autoc & IXGBE_AUTOC_KX_SUPP)
-			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = true;
-		break;
-
-	case IXGBE_AUTOC_LMS_SGMII_1G_100M:
-		*speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL;
-		*negotiation = false;
-		break;
-
-	default:
-		status = IXGBE_ERR_LINK_SETUP;
-		goto out;
-		break;
-	}
-
-	if (hw->phy.multispeed_fiber) {
-		*speed |= IXGBE_LINK_SPEED_10GB_FULL |
-			  IXGBE_LINK_SPEED_1GB_FULL;
-		*negotiation = true;
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_get_media_type_82599 - Get media type
- *  @hw: pointer to hardware structure
- *
- *  Returns the media type (fiber, copper, backplane)
- **/
-enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
-{
-	enum ixgbe_media_type media_type;
-
-	/* Detect if there is a copper PHY attached. */
-	switch (hw->phy.type) {
-	case ixgbe_phy_cu_unknown:
-	case ixgbe_phy_tn:
-		media_type = ixgbe_media_type_copper;
-		goto out;
-	default:
-		break;
-	}
-
-	switch (hw->device_id) {
-	case IXGBE_DEV_ID_82599_KX4:
-	case IXGBE_DEV_ID_82599_KX4_MEZZ:
-	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
-	case IXGBE_DEV_ID_82599_KR:
-	case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
-	case IXGBE_DEV_ID_82599_XAUI_LOM:
-		/* Default device ID is mezzanine card KX/KX4 */
-		media_type = ixgbe_media_type_backplane;
-		break;
-	case IXGBE_DEV_ID_82599_SFP:
-	case IXGBE_DEV_ID_82599_SFP_FCOE:
-	case IXGBE_DEV_ID_82599_SFP_EM:
-	case IXGBE_DEV_ID_82599_SFP_SF2:
-	case IXGBE_DEV_ID_82599EN_SFP:
-		media_type = ixgbe_media_type_fiber;
-		break;
-	case IXGBE_DEV_ID_82599_CX4:
-		media_type = ixgbe_media_type_cx4;
-		break;
-	case IXGBE_DEV_ID_82599_T3_LOM:
-		media_type = ixgbe_media_type_copper;
-		break;
-	case IXGBE_DEV_ID_82599_LS:
-		media_type = ixgbe_media_type_fiber_lco;
-		break;
-	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
-		media_type = ixgbe_media_type_fiber_qsfp;
-		break;
-	default:
-		media_type = ixgbe_media_type_unknown;
-		break;
-	}
-out:
-	return media_type;
-}
-
-/**
- *  ixgbe_start_mac_link_82599 - Setup MAC link settings
- *  @hw: pointer to hardware structure
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- *
- *  Configures link settings based on values in the ixgbe_hw struct.
- *  Restarts the link.  Performs autonegotiation if needed.
- **/
-s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
-			       bool autoneg_wait_to_complete)
-{
-	u32 autoc_reg;
-	u32 links_reg = 0;
-	u32 i;
-	s32 status = 0;
-
-	/* Restart link */
-	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
-	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
-
-	/* Only poll for autoneg to complete if specified to do so */
-	if (autoneg_wait_to_complete) {
-		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
-		     IXGBE_AUTOC_LMS_KX4_KX_KR ||
-		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
-		     IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
-		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
-		     IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
-			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
-				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
-					break;
-				msleep(100);
-			}
-			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
-				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
-				hw_dbg(hw, "Autoneg did not complete.\n");
-			}
-		}
-	}
-
-	/* Add delay to filter out noises during initial link setup */
-	msleep(50);
-
-	return status;
-}
-
-/**
- *  ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser
- *  @hw: pointer to hardware structure
- *
- *  The base drivers may require better control over SFP+ module
- *  PHY states.  This includes selectively shutting down the Tx
- *  laser on the PHY, effectively halting physical link.
- **/
-void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
-{
-	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
-
-	/* Disable tx laser; allow 100us to go dark per spec */
-	esdp_reg |= IXGBE_ESDP_SDP3;
-	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(100);
-}
-
-/**
- *  ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser
- *  @hw: pointer to hardware structure
- *
- *  The base drivers may require better control over SFP+ module
- *  PHY states.  This includes selectively turning on the Tx
- *  laser on the PHY, effectively starting physical link.
- **/
-void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
-{
-	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
-
-	/* Enable tx laser; allow 100ms to light up */
-	esdp_reg &= ~IXGBE_ESDP_SDP3;
-	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
-	IXGBE_WRITE_FLUSH(hw);
-	msleep(100);
-}
-
-/**
- *  ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser
- *  @hw: pointer to hardware structure
- *
- *  When the driver changes the link speeds that it can support,
- *  it sets autotry_restart to true to indicate that we need to
- *  initiate a new autotry session with the link partner.  To do
- *  so, we set the speed then disable and re-enable the tx laser, to
- *  alert the link partner that it also needs to restart autotry on its
- *  end.  This is consistent with true clause 37 autoneg, which also
- *  involves a loss of signal.
- **/
-void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
-{
-	if (hw->mac.autotry_restart) {
-		ixgbe_disable_tx_laser_multispeed_fiber(hw);
-		ixgbe_enable_tx_laser_multispeed_fiber(hw);
-		hw->mac.autotry_restart = false;
-	}
-}
-
-/**
- *  ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- *
- *  Set the link speed in the AUTOC register and restarts link.
- **/
-s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
-				     ixgbe_link_speed speed, bool autoneg,
-				     bool autoneg_wait_to_complete)
-{
-	s32 status = 0;
-	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
-	ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
-	u32 speedcnt = 0;
-	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
-	u32 i = 0;
-	bool link_up = false;
-	bool negotiation;
-
-	/* Mask off requested but non-supported speeds */
-	status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation);
-	if (status != 0)
-		return status;
-
-	speed &= link_speed;
-
-	/*
-	 * Try each speed one by one, highest priority first.  We do this in
-	 * software because 10gb fiber doesn't support speed autonegotiation.
-	 */
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-		speedcnt++;
-		highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL;
-
-		/* If we already have link at this speed, just jump out */
-		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-		if (status != 0)
-			return status;
-
-		if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up)
-			goto out;
-
-		/* Set the module link speed */
-		esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
-		IXGBE_WRITE_FLUSH(hw);
-
-		/* Allow module to change analog characteristics (1G->10G) */
-		msleep(40);
-
-		status = ixgbe_setup_mac_link_82599(hw,
-						    IXGBE_LINK_SPEED_10GB_FULL,
-						    autoneg,
-						    autoneg_wait_to_complete);
-		if (status != 0)
-			return status;
-
-		/* Flap the tx laser if it has not already been done */
-		ixgbe_flap_tx_laser(hw);
-
-		/*
-		 * Wait for the controller to acquire link.  Per IEEE 802.3ap,
-		 * Section 73.10.2, we may have to wait up to 500ms if KR is
-		 * attempted.  82599 uses the same timing for 10g SFI.
-		 */
-		for (i = 0; i < 5; i++) {
-			/* Wait for the link partner to also set speed */
-			msleep(100);
-
-			/* If we have link, just jump out */
-			status = ixgbe_check_link(hw, &link_speed,
-						  &link_up, false);
-			if (status != 0)
-				return status;
-
-			if (link_up)
-				goto out;
-		}
-	}
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-		speedcnt++;
-		if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN)
-			highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL;
-
-		/* If we already have link at this speed, just jump out */
-		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-		if (status != 0)
-			return status;
-
-		if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up)
-			goto out;
-
-		/* Set the module link speed */
-		esdp_reg &= ~IXGBE_ESDP_SDP5;
-		esdp_reg |= IXGBE_ESDP_SDP5_DIR;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
-		IXGBE_WRITE_FLUSH(hw);
-
-		/* Allow module to change analog characteristics (10G->1G) */
-		msleep(40);
-
-		status = ixgbe_setup_mac_link_82599(hw,
-						    IXGBE_LINK_SPEED_1GB_FULL,
-						    autoneg,
-						    autoneg_wait_to_complete);
-		if (status != 0)
-			return status;
-
-		/* Flap the tx laser if it has not already been done */
-		ixgbe_flap_tx_laser(hw);
-
-		/* Wait for the link partner to also set speed */
-		msleep(100);
-
-		/* If we have link, just jump out */
-		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-		if (status != 0)
-			return status;
-
-		if (link_up)
-			goto out;
-	}
-
-	/*
-	 * We didn't get link.  Configure back to the highest speed we tried,
-	 * (if there was more than one).  We call ourselves back with just the
-	 * single highest speed that the user requested.
-	 */
-	if (speedcnt > 1)
-		status = ixgbe_setup_mac_link_multispeed_fiber(hw,
-			highest_link_speed, autoneg, autoneg_wait_to_complete);
-
-out:
-	/* Set autoneg_advertised value based on input link speed */
-	hw->phy.autoneg_advertised = 0;
-
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
-
-	return status;
-}
-
-/**
- *  ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- *
- *  Implements the Intel SmartSpeed algorithm.
- **/
-s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
-				    ixgbe_link_speed speed, bool autoneg,
-				    bool autoneg_wait_to_complete)
-{
-	s32 status = 0;
-	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
-	s32 i, j;
-	bool link_up = false;
-	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-
-	 /* Set autoneg_advertised value based on input link speed */
-	hw->phy.autoneg_advertised = 0;
-
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
-
-	if (speed & IXGBE_LINK_SPEED_100_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
-
-	/*
-	 * Implement Intel SmartSpeed algorithm.  SmartSpeed will reduce the
-	 * autoneg advertisement if link is unable to be established at the
-	 * highest negotiated rate.  This can sometimes happen due to integrity
-	 * issues with the physical media connection.
-	 */
-
-	/* First, try to get link with full advertisement */
-	hw->phy.smart_speed_active = false;
-	for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) {
-		status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
-						    autoneg_wait_to_complete);
-		if (status != 0)
-			goto out;
-
-		/*
-		 * Wait for the controller to acquire link.  Per IEEE 802.3ap,
-		 * Section 73.10.2, we may have to wait up to 500ms if KR is
-		 * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per
-		 * Table 9 in the AN MAS.
-		 */
-		for (i = 0; i < 5; i++) {
-			msleep(100);
-
-			/* If we have link, just jump out */
-			status = ixgbe_check_link(hw, &link_speed, &link_up,
-						  false);
-			if (status != 0)
-				goto out;
-
-			if (link_up)
-				goto out;
-		}
-	}
-
-	/*
-	 * We didn't get link.  If we advertised KR plus one of KX4/KX
-	 * (or BX4/BX), then disable KR and try again.
-	 */
-	if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) ||
-	    ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0))
-		goto out;
-
-	/* Turn SmartSpeed on to disable KR support */
-	hw->phy.smart_speed_active = true;
-	status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
-					    autoneg_wait_to_complete);
-	if (status != 0)
-		goto out;
-
-	/*
-	 * Wait for the controller to acquire link.  600ms will allow for
-	 * the AN link_fail_inhibit_timer as well for multiple cycles of
-	 * parallel detect, both 10g and 1g. This allows for the maximum
-	 * connect attempts as defined in the AN MAS table 73-7.
-	 */
-	for (i = 0; i < 6; i++) {
-		msleep(100);
-
-		/* If we have link, just jump out */
-		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-		if (status != 0)
-			goto out;
-
-		if (link_up)
-			goto out;
-	}
-
-	/* We didn't get link.  Turn SmartSpeed back off. */
-	hw->phy.smart_speed_active = false;
-	status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
-					    autoneg_wait_to_complete);
-
-out:
-	if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL))
-		hw_dbg(hw, "Smartspeed has downgraded the link speed "
-		"from the maximum advertised\n");
-	return status;
-}
-
-/**
- *  ixgbe_setup_mac_link_82599 - Set MAC link speed
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- *
- *  Set the link speed in the AUTOC register and restarts link.
- **/
-s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
-			       ixgbe_link_speed speed, bool autoneg,
-			       bool autoneg_wait_to_complete)
-{
-	s32 status = 0;
-	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
-	u32 start_autoc = autoc;
-	u32 orig_autoc = 0;
-	u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
-	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
-	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
-	u32 links_reg = 0;
-	u32 i;
-	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
-
-	/* Check to see if speed passed in is supported. */
-	status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
-	if (status != 0)
-		goto out;
-
-	speed &= link_capabilities;
-
-	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
-		status = IXGBE_ERR_LINK_SETUP;
-		goto out;
-	}
-
-	/* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/
-	if (hw->mac.orig_link_settings_stored)
-		orig_autoc = hw->mac.orig_autoc;
-	else
-		orig_autoc = autoc;
-
-	if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
-	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
-	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
-		/* Set KX4/KX/KR support according to speed requested */
-		autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
-		if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-			if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
-				autoc |= IXGBE_AUTOC_KX4_SUPP;
-			if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) &&
-			    (hw->phy.smart_speed_active == false))
-				autoc |= IXGBE_AUTOC_KR_SUPP;
-		}
-		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
-			autoc |= IXGBE_AUTOC_KX_SUPP;
-	} else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
-		   (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
-		    link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
-		/* Switch from 1G SFI to 10G SFI if requested */
-		if ((speed == IXGBE_LINK_SPEED_10GB_FULL) &&
-		    (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) {
-			autoc &= ~IXGBE_AUTOC_LMS_MASK;
-			autoc |= IXGBE_AUTOC_LMS_10G_SERIAL;
-		}
-	} else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) &&
-		   (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
-		/* Switch from 10G SFI to 1G SFI if requested */
-		if ((speed == IXGBE_LINK_SPEED_1GB_FULL) &&
-		    (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) {
-			autoc &= ~IXGBE_AUTOC_LMS_MASK;
-			if (autoneg)
-				autoc |= IXGBE_AUTOC_LMS_1G_AN;
-			else
-				autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN;
-		}
-	}
-
-	if (autoc != start_autoc) {
-		/* Restart link */
-		autoc |= IXGBE_AUTOC_AN_RESTART;
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
-
-		/* Only poll for autoneg to complete if specified to do so */
-		if (autoneg_wait_to_complete) {
-			if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
-			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
-			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
-				for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
-					links_reg =
-					       IXGBE_READ_REG(hw, IXGBE_LINKS);
-					if (links_reg & IXGBE_LINKS_KX_AN_COMP)
-						break;
-					msleep(100);
-				}
-				if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
-					status =
-						IXGBE_ERR_AUTONEG_NOT_COMPLETE;
-					hw_dbg(hw, "Autoneg did not complete.\n");
-				}
-			}
-		}
-
-		/* Add delay to filter out noises during initial link setup */
-		msleep(50);
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true if waiting is needed to complete
- *
- *  Restarts link on PHY and MAC based on settings passed in.
- **/
-static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
-					 ixgbe_link_speed speed,
-					 bool autoneg,
-					 bool autoneg_wait_to_complete)
-{
-	s32 status;
-
-	/* Setup the PHY according to input speed */
-	status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
-					      autoneg_wait_to_complete);
-	/* Set up MAC */
-	ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete);
-
-	return status;
-}
-
-/**
- *  ixgbe_reset_hw_82599 - Perform hardware reset
- *  @hw: pointer to hardware structure
- *
- *  Resets the hardware by resetting the transmit and receive units, masks
- *  and clears all interrupts, perform a PHY reset, and perform a link (MAC)
- *  reset.
- **/
-s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
-{
-//	ixgbe_link_speed link_speed;
-	s32 status = 0;
-//	u32 ctrl, i, autoc, autoc2;
-//	bool link_up = false;
-
-#if 0
-	/* Call adapter stop to disable tx/rx and clear interrupts */
-	status = hw->mac.ops.stop_adapter(hw);
-	if (status != 0)
-		goto reset_hw_out;
-
-	/* flush pending Tx transactions */
-	ixgbe_clear_tx_pending(hw);
-
-	/* PHY ops must be identified and initialized prior to reset */
-
-	/* Identify PHY and related function pointers */
-	status = hw->phy.ops.init(hw);
-
-	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
-		goto reset_hw_out;
-
-	/* Setup SFP module if there is one present. */
-	if (hw->phy.sfp_setup_needed) {
-		status = hw->mac.ops.setup_sfp(hw);
-		hw->phy.sfp_setup_needed = false;
-	}
-
-	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
-		goto reset_hw_out;
-
-	/* Reset PHY */
-	if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
-		hw->phy.ops.reset(hw);
-
-mac_reset_top:
-	/*
-	 * Issue global reset to the MAC.  Needs to be SW reset if link is up.
-	 * If link reset is used when link is up, it might reset the PHY when
-	 * mng is using it.  If link is down or the flag to force full link
-	 * reset is set, then perform link reset.
-	 */
-	ctrl = IXGBE_CTRL_LNK_RST;
-	if (!hw->force_full_reset) {
-		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-		if (link_up)
-			ctrl = IXGBE_CTRL_RST;
-	}
-
-	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
-	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Poll for reset bit to self-clear indicating reset is complete */
-	for (i = 0; i < 10; i++) {
-		udelay(1);
-		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
-		if (!(ctrl & IXGBE_CTRL_RST_MASK))
-			break;
-	}
-
-	if (ctrl & IXGBE_CTRL_RST_MASK) {
-		status = IXGBE_ERR_RESET_FAILED;
-		hw_dbg(hw, "Reset polling failed to complete.\n");
-	}
-
-	msleep(50);
-
-	/*
-	 * Double resets are required for recovery from certain error
-	 * conditions.  Between resets, it is necessary to stall to allow time
-	 * for any pending HW events to complete.
-	 */
-	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
-		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
-		goto mac_reset_top;
-	}
-
-	/*
-	 * Store the original AUTOC/AUTOC2 values if they have not been
-	 * stored off yet.  Otherwise restore the stored original
-	 * values since the reset operation sets back to defaults.
-	 */
-	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
-	if (hw->mac.orig_link_settings_stored == false) {
-		hw->mac.orig_autoc = autoc;
-		hw->mac.orig_autoc2 = autoc2;
-		hw->mac.orig_link_settings_stored = true;
-	} else {
-		if (autoc != hw->mac.orig_autoc)
-			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
-					IXGBE_AUTOC_AN_RESTART));
-
-		if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
-		    (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
-			autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
-			autoc2 |= (hw->mac.orig_autoc2 &
-				   IXGBE_AUTOC2_UPPER_MASK);
-			IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
-		}
-	}
-#endif
-
-	/* Store the permanent mac address */
-	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
-
-	/*
-	 * Store MAC address from RAR0, clear receive address registers, and
-	 * clear the multicast table.  Also reset num_rar_entries to 128,
-	 * since we modify this value when programming the SAN MAC address.
-	 */
-	hw->mac.num_rar_entries = 128;
-	hw->mac.ops.init_rx_addrs(hw);
-
-	/* Store the permanent SAN mac address */
-	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
-
-	/* Add the SAN MAC address to the RAR only if it's a valid address */
-	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
-		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
-
-		/* Save the SAN MAC RAR index */
-		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
-
-		/* Reserve the last RAR for the SAN MAC address */
-		hw->mac.num_rar_entries--;
-	}
-
-	/* Store the alternative WWNN/WWPN prefix */
-	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
-				   &hw->mac.wwpn_prefix);
-
-//reset_hw_out:
-	return status;
-}
-
-/**
- *  ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables.
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
-{
-	int i;
-	u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
-	fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
-
-	/*
-	 * Before starting reinitialization process,
-	 * FDIRCMD.CMD must be zero.
-	 */
-	for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) {
-		if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
-		      IXGBE_FDIRCMD_CMD_MASK))
-			break;
-		udelay(10);
-	}
-	if (i >= IXGBE_FDIRCMD_CMD_POLL) {
-		hw_dbg(hw, "Flow Director previous command isn't complete, "
-			 "aborting table re-initialization.\n");
-		return IXGBE_ERR_FDIR_REINIT_FAILED;
-	}
-
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0);
-	IXGBE_WRITE_FLUSH(hw);
-	/*
-	 * 82599 adapters flow director init flow cannot be restarted,
-	 * Workaround 82599 silicon errata by performing the following steps
-	 * before re-writing the FDIRCTRL control register with the same value.
-	 * - write 1 to bit 8 of FDIRCMD register &
-	 * - write 0 to bit 8 of FDIRCMD register
-	 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
-			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
-			 IXGBE_FDIRCMD_CLEARHT));
-	IXGBE_WRITE_FLUSH(hw);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
-			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
-			 ~IXGBE_FDIRCMD_CLEARHT));
-	IXGBE_WRITE_FLUSH(hw);
-	/*
-	 * Clear FDIR Hash register to clear any leftover hashes
-	 * waiting to be programmed.
-	 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00);
-	IXGBE_WRITE_FLUSH(hw);
-
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Poll init-done after we write FDIRCTRL register */
-	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
-		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-				   IXGBE_FDIRCTRL_INIT_DONE)
-			break;
-		udelay(10);
-	}
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL) {
-		hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
-		return IXGBE_ERR_FDIR_REINIT_FAILED;
-	}
-
-	/* Clear FDIR statistics registers (read to clear) */
-	IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT);
-	IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT);
-	IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
-	IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
-	IXGBE_READ_REG(hw, IXGBE_FDIRLEN);
-
-	return 0;
-}
-
-/**
- *  ixgbe_fdir_enable_82599 - Initialize Flow Director control registers
- *  @hw: pointer to hardware structure
- *  @fdirctrl: value to write to flow director control register
- **/
-static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
-{
-	int i;
-
-	/* Prime the keys for hashing */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
-
-	/*
-	 * Poll init-done after we write the register.  Estimated times:
-	 *      10G: PBALLOC = 11b, timing is 60us
-	 *       1G: PBALLOC = 11b, timing is 600us
-	 *     100M: PBALLOC = 11b, timing is 6ms
-	 *
-	 *     Multiple these timings by 4 if under full Rx load
-	 *
-	 * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
-	 * 1 msec per poll time.  If we're at line rate and drop to 100M, then
-	 * this might not finish in our poll time, but we can live with that
-	 * for now.
-	 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
-	IXGBE_WRITE_FLUSH(hw);
-	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
-		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-				   IXGBE_FDIRCTRL_INIT_DONE)
-			break;
-		msleep(1);
-	}
-
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
-		hw_dbg(hw, "Flow Director poll time exceeded!\n");
-}
-
-/**
- *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
- *  @hw: pointer to hardware structure
- *  @fdirctrl: value to write to flow director control register, initially
- *	     contains just the value of the Rx packet buffer allocation
- **/
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
-{
-	/*
-	 * Continue setup of fdirctrl register bits:
-	 *  Move the flexible bytes to use the ethertype - shift 6 words
-	 *  Set the maximum length per hash bucket to 0xA filters
-	 *  Send interrupt when 64 filters are left
-	 */
-	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
-		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
-		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
-
-	/* write hashes and fdirctrl register, poll for completion */
-	ixgbe_fdir_enable_82599(hw, fdirctrl);
-
-	return 0;
-}
-
-/**
- *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
- *  @hw: pointer to hardware structure
- *  @fdirctrl: value to write to flow director control register, initially
- *	     contains just the value of the Rx packet buffer allocation
- **/
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
-{
-	/*
-	 * Continue setup of fdirctrl register bits:
-	 *  Turn perfect match filtering on
-	 *  Report hash in RSS field of Rx wb descriptor
-	 *  Initialize the drop queue
-	 *  Move the flexible bytes to use the ethertype - shift 6 words
-	 *  Set the maximum length per hash bucket to 0xA filters
-	 *  Send interrupt when 64 (0x4 * 16) filters are left
-	 */
-	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
-		    IXGBE_FDIRCTRL_REPORT_STATUS |
-		    (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
-		    (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
-		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
-		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
-
-	/* write hashes and fdirctrl register, poll for completion */
-	ixgbe_fdir_enable_82599(hw, fdirctrl);
-
-	return 0;
-}
-
-/*
- * These defines allow us to quickly generate all of the necessary instructions
- * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
- * for values 0 through 15
- */
-#define IXGBE_ATR_COMMON_HASH_KEY \
-		(IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
-#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
-do { \
-	u32 n = (_n); \
-	if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
-		common_hash ^= lo_hash_dword >> n; \
-	else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
-		bucket_hash ^= lo_hash_dword >> n; \
-	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
-		sig_hash ^= lo_hash_dword << (16 - n); \
-	if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
-		common_hash ^= hi_hash_dword >> n; \
-	else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
-		bucket_hash ^= hi_hash_dword >> n; \
-	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
-		sig_hash ^= hi_hash_dword << (16 - n); \
-} while (0);
-
-/**
- *  ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
- *  @stream: input bitstream to compute the hash on
- *
- *  This function is almost identical to the function above but contains
- *  several optomizations such as unwinding all of the loops, letting the
- *  compiler work out all of the conditional ifs since the keys are static
- *  defines, and computing two keys at once since the hashed dword stream
- *  will be the same for both keys.
- **/
-u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
-				     union ixgbe_atr_hash_dword common)
-{
-	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
-	u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
-
-	/* record the flow_vm_vlan bits as they are a key part to the hash */
-	flow_vm_vlan = IXGBE_NTOHL(input.dword);
-
-	/* generate common hash dword */
-	hi_hash_dword = IXGBE_NTOHL(common.dword);
-
-	/* low dword is word swapped version of common */
-	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
-
-	/* apply flow ID/VM pool/VLAN ID bits to hash words */
-	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
-
-	/* Process bits 0 and 16 */
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
-
-	/*
-	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
-	 * delay this because bit 0 of the stream should not be processed
-	 * so we do not add the vlan until after bit 0 was processed
-	 */
-	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
-
-	/* Process remaining 30 bit of the key */
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
-	IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
-
-	/* combine common_hash result with signature and bucket hashes */
-	bucket_hash ^= common_hash;
-	bucket_hash &= IXGBE_ATR_HASH_MASK;
-
-	sig_hash ^= common_hash << 16;
-	sig_hash &= IXGBE_ATR_HASH_MASK << 16;
-
-	/* return completed signature hash */
-	return sig_hash ^ bucket_hash;
-}
-
-/**
- *  ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
- *  @hw: pointer to hardware structure
- *  @input: unique input dword
- *  @common: compressed common input dword
- *  @queue: queue index to direct traffic to
- **/
-s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_hash_dword input,
-					  union ixgbe_atr_hash_dword common,
-					  u8 queue)
-{
-	u64  fdirhashcmd;
-	u32  fdircmd;
-
-	/*
-	 * Get the flow_type in order to program FDIRCMD properly
-	 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
-	 */
-	switch (input.formatted.flow_type) {
-	case IXGBE_ATR_FLOW_TYPE_TCPV4:
-	case IXGBE_ATR_FLOW_TYPE_UDPV4:
-	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
-	case IXGBE_ATR_FLOW_TYPE_TCPV6:
-	case IXGBE_ATR_FLOW_TYPE_UDPV6:
-	case IXGBE_ATR_FLOW_TYPE_SCTPV6:
-		break;
-	default:
-		hw_dbg(hw, " Error on flow type input\n");
-		return IXGBE_ERR_CONFIG;
-	}
-
-	/* configure FDIRCMD register */
-	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
-		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
-	fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
-	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
-
-	/*
-	 * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
-	 * is for FDIRCMD.  Then do a 64-bit register write from FDIRHASH.
-	 */
-	fdirhashcmd = (u64)fdircmd << 32;
-	fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
-	IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
-
-	hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
-
-	return 0;
-}
-
-#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
-do { \
-	u32 n = (_n); \
-	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
-		bucket_hash ^= lo_hash_dword >> n; \
-	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
-		bucket_hash ^= hi_hash_dword >> n; \
-} while (0);
-
-/**
- *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
- *  @atr_input: input bitstream to compute the hash on
- *  @input_mask: mask for the input bitstream
- *
- *  This function serves two main purposes.  First it applys the input_mask
- *  to the atr_input resulting in a cleaned up atr_input data stream.
- *  Secondly it computes the hash and stores it in the bkt_hash field at
- *  the end of the input byte stream.  This way it will be available for
- *  future use without needing to recompute the hash.
- **/
-void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
-					  union ixgbe_atr_input *input_mask)
-{
-
-	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
-	u32 bucket_hash = 0;
-
-	/* Apply masks to input data */
-	input->dword_stream[0]  &= input_mask->dword_stream[0];
-	input->dword_stream[1]  &= input_mask->dword_stream[1];
-	input->dword_stream[2]  &= input_mask->dword_stream[2];
-	input->dword_stream[3]  &= input_mask->dword_stream[3];
-	input->dword_stream[4]  &= input_mask->dword_stream[4];
-	input->dword_stream[5]  &= input_mask->dword_stream[5];
-	input->dword_stream[6]  &= input_mask->dword_stream[6];
-	input->dword_stream[7]  &= input_mask->dword_stream[7];
-	input->dword_stream[8]  &= input_mask->dword_stream[8];
-	input->dword_stream[9]  &= input_mask->dword_stream[9];
-	input->dword_stream[10] &= input_mask->dword_stream[10];
-
-	/* record the flow_vm_vlan bits as they are a key part to the hash */
-	flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]);
-
-	/* generate common hash dword */
-	hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^
-				    input->dword_stream[2] ^
-				    input->dword_stream[3] ^
-				    input->dword_stream[4] ^
-				    input->dword_stream[5] ^
-				    input->dword_stream[6] ^
-				    input->dword_stream[7] ^
-				    input->dword_stream[8] ^
-				    input->dword_stream[9] ^
-				    input->dword_stream[10]);
-
-	/* low dword is word swapped version of common */
-	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
-
-	/* apply flow ID/VM pool/VLAN ID bits to hash words */
-	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
-
-	/* Process bits 0 and 16 */
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
-
-	/*
-	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
-	 * delay this because bit 0 of the stream should not be processed
-	 * so we do not add the vlan until after bit 0 was processed
-	 */
-	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
-
-	/* Process remaining 30 bit of the key */
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(1);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(2);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(3);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(4);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(5);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(6);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(7);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(8);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(9);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(10);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(11);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(12);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(13);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(14);
-	IXGBE_COMPUTE_BKT_HASH_ITERATION(15);
-
-	/*
-	 * Limit hash to 13 bits since max bucket count is 8K.
-	 * Store result at the end of the input stream.
-	 */
-	input->formatted.bkt_hash = bucket_hash & 0x1FFF;
-}
-
-/**
- *  ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
- *  @input_mask: mask to be bit swapped
- *
- *  The source and destination port masks for flow director are bit swapped
- *  in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc.  In order to
- *  generate a correctly swapped value we need to bit swap the mask and that
- *  is what is accomplished by this function.
- **/
-static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
-{
-	u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port);
-	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
-	mask |= IXGBE_NTOHS(input_mask->formatted.src_port);
-	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
-	mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
-	mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
-	return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8);
-}
-
-/*
- * These two macros are meant to address the fact that we have registers
- * that are either all or in part big-endian.  As a result on big-endian
- * systems we will end up byte swapping the value to little-endian before
- * it is byte swapped again and written to the hardware in the original
- * big-endian format.
- */
-#define IXGBE_STORE_AS_BE32(_value) \
-	(((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
-	 (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
-
-#define IXGBE_WRITE_REG_BE32(a, reg, value) \
-	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value)))
-
-#define IXGBE_STORE_AS_BE16(_value) \
-	IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8))
-
-s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
-				    union ixgbe_atr_input *input_mask)
-{
-	/* mask IPv6 since it is currently not supported */
-	u32 fdirm = IXGBE_FDIRM_DIPv6;
-	u32 fdirtcpm;
-
-	/*
-	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
-	 * are zero, then assume a full mask for that field.  Also assume that
-	 * a VLAN of 0 is unspecified, so mask that out as well.  L4type
-	 * cannot be masked out in this implementation.
-	 *
-	 * This also assumes IPv4 only.  IPv6 masking isn't supported at this
-	 * point in time.
-	 */
-
-	/* verify bucket hash is cleared on hash generation */
-	if (input_mask->formatted.bkt_hash)
-		hw_dbg(hw, " bucket hash should always be 0 in mask\n");
-
-	/* Program FDIRM and verify partial masks */
-	switch (input_mask->formatted.vm_pool & 0x7F) {
-	case 0x0:
-		fdirm |= IXGBE_FDIRM_POOL;
-	case 0x7F:
-		break;
-	default:
-		hw_dbg(hw, " Error on vm pool mask\n");
-		return IXGBE_ERR_CONFIG;
-	}
-
-	switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
-	case 0x0:
-		fdirm |= IXGBE_FDIRM_L4P;
-		if (input_mask->formatted.dst_port ||
-		    input_mask->formatted.src_port) {
-			hw_dbg(hw, " Error on src/dst port mask\n");
-			return IXGBE_ERR_CONFIG;
-		}
-	case IXGBE_ATR_L4TYPE_MASK:
-		break;
-	default:
-		hw_dbg(hw, " Error on flow type mask\n");
-		return IXGBE_ERR_CONFIG;
-	}
-
-	switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) {
-	case 0x0000:
-		/* mask VLAN ID, fall through to mask VLAN priority */
-		fdirm |= IXGBE_FDIRM_VLANID;
-	case 0x0FFF:
-		/* mask VLAN priority */
-		fdirm |= IXGBE_FDIRM_VLANP;
-		break;
-	case 0xE000:
-		/* mask VLAN ID only, fall through */
-		fdirm |= IXGBE_FDIRM_VLANID;
-	case 0xEFFF:
-		/* no VLAN fields masked */
-		break;
-	default:
-		hw_dbg(hw, " Error on VLAN mask\n");
-		return IXGBE_ERR_CONFIG;
-	}
-
-	switch (input_mask->formatted.flex_bytes & 0xFFFF) {
-	case 0x0000:
-		/* Mask Flex Bytes, fall through */
-		fdirm |= IXGBE_FDIRM_FLEX;
-	case 0xFFFF:
-		break;
-	default:
-		hw_dbg(hw, " Error on flexible byte mask\n");
-		return IXGBE_ERR_CONFIG;
-	}
-
-	/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
-
-	/* store the TCP/UDP port masks, bit reversed from port layout */
-	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
-
-	/* write both the same so that UDP and TCP use the same mask */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
-
-	/* store source and destination IP masks (big-enian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
-			     ~input_mask->formatted.src_ip[0]);
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
-			     ~input_mask->formatted.dst_ip[0]);
-
-	return 0;
-}
-
-s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_input *input,
-					  u16 soft_id, u8 queue)
-{
-	u32 fdirport, fdirvlan, fdirhash, fdircmd;
-
-	/* currently IPv6 is not supported, must be programmed with 0 */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
-			     input->formatted.src_ip[0]);
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
-			     input->formatted.src_ip[1]);
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
-			     input->formatted.src_ip[2]);
-
-	/* record the source address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
-
-	/* record the first 32 bits of the destination address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
-
-	/* record source and destination port (little-endian)*/
-	fdirport = IXGBE_NTOHS(input->formatted.dst_port);
-	fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
-	fdirport |= IXGBE_NTOHS(input->formatted.src_port);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
-
-	/* record vlan (little-endian) and flex_bytes(big-endian) */
-	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
-	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
-	fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
-
-	/* configure FDIRHASH register */
-	fdirhash = input->formatted.bkt_hash;
-	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
-
-	/*
-	 * flush all previous writes to make certain registers are
-	 * programmed prior to issuing the command
-	 */
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* configure FDIRCMD register */
-	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
-		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
-	if (queue == IXGBE_FDIR_DROP_QUEUE)
-		fdircmd |= IXGBE_FDIRCMD_DROP;
-	fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
-	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
-	fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
-
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
-
-	return 0;
-}
-
-s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_input *input,
-					  u16 soft_id)
-{
-	u32 fdirhash;
-	u32 fdircmd = 0;
-	u32 retry_count;
-	s32 err = 0;
-
-	/* configure FDIRHASH register */
-	fdirhash = input->formatted.bkt_hash;
-	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
-
-	/* flush hash to HW */
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Query if filter is present */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
-
-	for (retry_count = 10; retry_count; retry_count--) {
-		/* allow 10us for query to process */
-		udelay(10);
-		/* verify query completed successfully */
-		fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
-		if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK))
-			break;
-	}
-
-	if (!retry_count)
-		err = IXGBE_ERR_FDIR_REINIT_FAILED;
-
-	/* if filter exists in hardware then remove it */
-	if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
-		IXGBE_WRITE_FLUSH(hw);
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
-				IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
-	}
-
-	return err;
-}
-
-/**
- *  ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
- *  @hw: pointer to hardware structure
- *  @input: input bitstream
- *  @input_mask: mask for the input bitstream
- *  @soft_id: software index for the filters
- *  @queue: queue index to direct traffic to
- *
- *  Note that the caller to this function must lock before calling, since the
- *  hardware writes must be protected from one another.
- **/
-s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-					union ixgbe_atr_input *input,
-					union ixgbe_atr_input *input_mask,
-					u16 soft_id, u8 queue)
-{
-	s32 err = IXGBE_ERR_CONFIG;
-
-	/*
-	 * Check flow_type formatting, and bail out before we touch the hardware
-	 * if there's a configuration issue
-	 */
-	switch (input->formatted.flow_type) {
-	case IXGBE_ATR_FLOW_TYPE_IPV4:
-		input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK;
-		if (input->formatted.dst_port || input->formatted.src_port) {
-			hw_dbg(hw, " Error on src/dst port\n");
-			return IXGBE_ERR_CONFIG;
-		}
-		break;
-	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
-		if (input->formatted.dst_port || input->formatted.src_port) {
-			hw_dbg(hw, " Error on src/dst port\n");
-			return IXGBE_ERR_CONFIG;
-		}
-	case IXGBE_ATR_FLOW_TYPE_TCPV4:
-	case IXGBE_ATR_FLOW_TYPE_UDPV4:
-		input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
-						  IXGBE_ATR_L4TYPE_MASK;
-		break;
-	default:
-		hw_dbg(hw, " Error on flow type input\n");
-		return err;
-	}
-
-	/* program input mask into the HW */
-	err = ixgbe_fdir_set_input_mask_82599(hw, input_mask);
-	if (err)
-		return err;
-
-	/* apply mask and compute/store hash */
-	ixgbe_atr_compute_perfect_hash_82599(input, input_mask);
-
-	/* program filters to filter memory */
-	return ixgbe_fdir_write_perfect_filter_82599(hw, input,
-						     soft_id, queue);
-}
-
-/**
- *  ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
- *  @hw: pointer to hardware structure
- *  @reg: analog register to read
- *  @val: read value
- *
- *  Performs read operation to Omer analog register specified.
- **/
-s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
-{
-	u32  core_ctl;
-
-	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD |
-			(reg << 8));
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(10);
-	core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL);
-	*val = (u8)core_ctl;
-
-	return 0;
-}
-
-/**
- *  ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register
- *  @hw: pointer to hardware structure
- *  @reg: atlas register to write
- *  @val: value to write
- *
- *  Performs write operation to Omer analog register specified.
- **/
-s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
-{
-	u32  core_ctl;
-
-	core_ctl = (reg << 8) | val;
-	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(10);
-
-	return 0;
-}
-
-/**
- *  ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx
- *  @hw: pointer to hardware structure
- *
- *  Starts the hardware using the generic start_hw function
- *  and the generation start_hw function.
- *  Then performs revision-specific operations, if any.
- **/
-s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-
-	ret_val = ixgbe_start_hw_generic(hw);
-	if (ret_val != 0)
-		goto out;
-
-	ret_val = ixgbe_start_hw_gen2(hw);
-	if (ret_val != 0)
-		goto out;
-
-	/* We need to run link autotry after the driver loads */
-	hw->mac.autotry_restart = true;
-
-	if (ret_val == 0)
-		ret_val = ixgbe_verify_fw_version_82599(hw);
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_identify_phy_82599 - Get physical layer module
- *  @hw: pointer to hardware structure
- *
- *  Determines the physical layer module found on the current adapter.
- *  If PHY already detected, maintains current PHY type in hw struct,
- *  otherwise executes the PHY detection routine.
- **/
-s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
-
-	/* Detect PHY if not unknown - returns success if already detected. */
-	status = ixgbe_identify_phy_generic(hw);
-	if (status != 0) {
-		/* 82599 10GBASE-T requires an external PHY */
-		if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper)
-			goto out;
-		else
-			status = ixgbe_identify_module_generic(hw);
-	}
-
-	/* Set PHY type none if no PHY detected */
-	if (hw->phy.type == ixgbe_phy_unknown) {
-		hw->phy.type = ixgbe_phy_none;
-		status = 0;
-	}
-
-	/* Return error if SFP module has been detected but is not supported */
-	if (hw->phy.type == ixgbe_phy_sfp_unsupported)
-		status = IXGBE_ERR_SFP_NOT_SUPPORTED;
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_get_supported_physical_layer_82599 - Returns physical layer type
- *  @hw: pointer to hardware structure
- *
- *  Determines physical layer capabilities of the current configuration.
- **/
-u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw)
-{
-	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
-	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
-	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
-	u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
-	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
-	u16 ext_ability = 0;
-	u8 comp_codes_10g = 0;
-	u8 comp_codes_1g = 0;
-
-	hw->phy.ops.identify(hw);
-
-	switch (hw->phy.type) {
-	case ixgbe_phy_tn:
-	case ixgbe_phy_cu_unknown:
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
-		IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
-		if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
-		if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
-		if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
-		goto out;
-	default:
-		break;
-	}
-
-	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
-	case IXGBE_AUTOC_LMS_1G_AN:
-	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
-		if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) {
-			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX |
-			    IXGBE_PHYSICAL_LAYER_1000BASE_BX;
-			goto out;
-		} else
-			/* SFI mode so read SFP module */
-			goto sfp_check;
-		break;
-	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
-		if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
-		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
-		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI;
-		goto out;
-		break;
-	case IXGBE_AUTOC_LMS_10G_SERIAL:
-		if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) {
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR;
-			goto out;
-		} else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)
-			goto sfp_check;
-		break;
-	case IXGBE_AUTOC_LMS_KX4_KX_KR:
-	case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
-		if (autoc & IXGBE_AUTOC_KX_SUPP)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
-		if (autoc & IXGBE_AUTOC_KX4_SUPP)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
-		if (autoc & IXGBE_AUTOC_KR_SUPP)
-			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR;
-		goto out;
-		break;
-	default:
-		goto out;
-		break;
-	}
-
-sfp_check:
-	/* SFP check must be done last since DA modules are sometimes used to
-	 * test KR mode -  we need to id KR mode correctly before SFP module.
-	 * Call identify_sfp because the pluggable module may have changed */
-	hw->phy.ops.identify_sfp(hw);
-	if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
-		goto out;
-
-	switch (hw->phy.type) {
-	case ixgbe_phy_sfp_passive_tyco:
-	case ixgbe_phy_sfp_passive_unknown:
-		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
-		break;
-	case ixgbe_phy_sfp_ftl_active:
-	case ixgbe_phy_sfp_active_unknown:
-		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA;
-		break;
-	case ixgbe_phy_sfp_avago:
-	case ixgbe_phy_sfp_ftl:
-	case ixgbe_phy_sfp_intel:
-	case ixgbe_phy_sfp_unknown:
-		hw->phy.ops.read_i2c_eeprom(hw,
-		      IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g);
-		hw->phy.ops.read_i2c_eeprom(hw,
-		      IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g);
-		if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
-		else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
-			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
-		else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE)
-			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T;
-		else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE)
-			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX;
-		break;
-	default:
-		break;
-	}
-
-out:
-	return physical_layer;
-}
-
-/**
- *  ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599
- *  @hw: pointer to hardware structure
- *  @regval: register value to write to RXCTRL
- *
- *  Enables the Rx DMA unit for 82599
- **/
-s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
-{
-
-	/*
-	 * Workaround for 82599 silicon errata when enabling the Rx datapath.
-	 * If traffic is incoming before we enable the Rx unit, it could hang
-	 * the Rx DMA unit.  Therefore, make sure the security engine is
-	 * completely disabled prior to enabling the Rx unit.
-	 */
-
-	hw->mac.ops.disable_sec_rx_path(hw);
-
-	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
-
-	hw->mac.ops.enable_sec_rx_path(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_verify_fw_version_82599 - verify fw version for 82599
- *  @hw: pointer to hardware structure
- *
- *  Verifies that installed the firmware version is 0.6 or higher
- *  for SFI devices. All 82599 SFI devices should have version 0.6 or higher.
- *
- *  Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or
- *  if the FW version is not supported.
- **/
-static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_EEPROM_VERSION;
-	u16 fw_offset, fw_ptp_cfg_offset;
-	u16 fw_version = 0;
-
-	/* firmware check is only necessary for SFI devices */
-	if (hw->phy.media_type != ixgbe_media_type_fiber) {
-		status = 0;
-		goto fw_version_out;
-	}
-
-	/* get the offset to the Firmware Module block */
-	hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
-
-	if ((fw_offset == 0) || (fw_offset == 0xFFFF))
-		goto fw_version_out;
-
-	/* get the offset to the Pass Through Patch Configuration block */
-	hw->eeprom.ops.read(hw, (fw_offset +
-				 IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR),
-				 &fw_ptp_cfg_offset);
-
-	if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF))
-		goto fw_version_out;
-
-	/* get the firmware version */
-	hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset +
-			    IXGBE_FW_PATCH_VERSION_4), &fw_version);
-
-	if (fw_version > 0x5)
-		status = 0;
-
-fw_version_out:
-	return status;
-}
-
-/**
- *  ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state.
- *  @hw: pointer to hardware structure
- *
- *  Returns true if the LESM FW module is present and enabled. Otherwise
- *  returns false. Smart Speed must be disabled if LESM FW module is enabled.
- **/
-bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
-{
-	bool lesm_enabled = false;
-	u16 fw_offset, fw_lesm_param_offset, fw_lesm_state;
-	s32 status;
-
-	/* get the offset to the Firmware Module block */
-	status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
-
-	if ((status != 0) ||
-	    (fw_offset == 0) || (fw_offset == 0xFFFF))
-		goto out;
-
-	/* get the offset to the LESM Parameters block */
-	status = hw->eeprom.ops.read(hw, (fw_offset +
-				     IXGBE_FW_LESM_PARAMETERS_PTR),
-				     &fw_lesm_param_offset);
-
-	if ((status != 0) ||
-	    (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF))
-		goto out;
-
-	/* get the lesm state word */
-	status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset +
-				     IXGBE_FW_LESM_STATE_1),
-				     &fw_lesm_state);
-
-	if ((status == 0) &&
-	    (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED))
-		lesm_enabled = true;
-
-out:
-	return lesm_enabled;
-}
-
-/**
- *  ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using
- *  fastest available method
- *
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in EEPROM to read
- *  @words: number of words
- *  @data: word(s) read from the EEPROM
- *
- *  Retrieves 16 bit word(s) read from EEPROM
- **/
-static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
-					  u16 words, u16 *data)
-{
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	s32 ret_val = IXGBE_ERR_CONFIG;
-
-	/*
-	 * If EEPROM is detected and can be addressed using 14 bits,
-	 * use EERD otherwise use bit bang
-	 */
-	if ((eeprom->type == ixgbe_eeprom_spi) &&
-	    (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR))
-		ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words,
-							 data);
-	else
-		ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset,
-								    words,
-								    data);
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_read_eeprom_82599 - Read EEPROM word using
- *  fastest available method
- *
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM
- **/
-static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
-				   u16 offset, u16 *data)
-{
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	s32 ret_val = IXGBE_ERR_CONFIG;
-
-	/*
-	 * If EEPROM is detected and can be addressed using 14 bits,
-	 * use EERD otherwise use bit bang
-	 */
-	if ((eeprom->type == ixgbe_eeprom_spi) &&
-	    (offset <= IXGBE_EERD_MAX_ADDR))
-		ret_val = ixgbe_read_eerd_generic(hw, offset, data);
-	else
-		ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data);
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to read
- *  @data: value read
- *
- *  Performs byte read operation to SFP module's EEPROM over I2C interface at
- *  a specified device address.
- **/
-static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data)
-{
-	u32 esdp;
-	s32 status;
-	s32 timeout = 200;
-
-	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
-		/* Acquire I2C bus ownership. */
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-		esdp |= IXGBE_ESDP_SDP0;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_FLUSH(hw);
-
-		while (timeout) {
-			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-			if (esdp & IXGBE_ESDP_SDP1)
-				break;
-
-			msleep(5);
-			timeout--;
-		}
-
-		if (!timeout) {
-			hw_dbg(hw, "Driver can't access resource,"
-				 " acquiring I2C bus timeout.\n");
-			status = IXGBE_ERR_I2C;
-			goto release_i2c_access;
-		}
-	}
-
-	status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data);
-
-release_i2c_access:
-
-	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
-		/* Release I2C bus ownership. */
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-		esdp &= ~IXGBE_ESDP_SDP0;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_FLUSH(hw);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @data: value to write
- *
- *  Performs byte write operation to SFP module's EEPROM over I2C interface at
- *  a specified device address.
- **/
-static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data)
-{
-	u32 esdp;
-	s32 status;
-	s32 timeout = 200;
-
-	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
-		/* Acquire I2C bus ownership. */
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-		esdp |= IXGBE_ESDP_SDP0;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_FLUSH(hw);
-
-		while (timeout) {
-			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-			if (esdp & IXGBE_ESDP_SDP1)
-				break;
-
-			msleep(5);
-			timeout--;
-		}
-
-		if (!timeout) {
-			hw_dbg(hw, "Driver can't access resource,"
-				 " acquiring I2C bus timeout.\n");
-			status = IXGBE_ERR_I2C;
-			goto release_i2c_access;
-		}
-	}
-
-	status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data);
-
-release_i2c_access:
-
-	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
-		/* Release I2C bus ownership. */
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
-		esdp &= ~IXGBE_ESDP_SDP0;
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_FLUSH(hw);
-	}
-
-	return status;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
deleted file mode 100644
index 0305ed73..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_82599_H_
-#define _IXGBE_82599_H_
-
-s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
-				      ixgbe_link_speed *speed, bool *autoneg);
-enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw);
-void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
-void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
-void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
-s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
-					  ixgbe_link_speed speed, bool autoneg,
-					  bool autoneg_wait_to_complete);
-s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
-				    ixgbe_link_speed speed, bool autoneg,
-				    bool autoneg_wait_to_complete);
-s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
-			       bool autoneg_wait_to_complete);
-s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-			       bool autoneg, bool autoneg_wait_to_complete);
-s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw);
-void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw);
-s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw);
-s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val);
-s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val);
-s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw);
-s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw);
-s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw);
-u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw);
-s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval);
-bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
-#endif /* _IXGBE_82599_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
deleted file mode 100644
index 1be4c64f..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
+++ /dev/null
@@ -1,1142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_api.h"
-#include "ixgbe_common.h"
-
-/**
- *  ixgbe_init_shared_code - Initialize the shared code
- *  @hw: pointer to hardware structure
- *
- *  This will assign function pointers and assign the MAC type and PHY code.
- *  Does not touch the hardware. This function must be called prior to any
- *  other function in the shared code. The ixgbe_hw structure should be
- *  memset to 0 prior to calling this function.  The following fields in
- *  hw structure should be filled in prior to calling this function:
- *  hw_addr, back, device_id, vendor_id, subsystem_device_id,
- *  subsystem_vendor_id, and revision_id
- **/
-s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
-{
-	s32 status;
-
-	/*
-	 * Set the mac type
-	 */
-	ixgbe_set_mac_type(hw);
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		status = ixgbe_init_ops_82598(hw);
-		break;
-	case ixgbe_mac_82599EB:
-		status = ixgbe_init_ops_82599(hw);
-		break;
-	case ixgbe_mac_X540:
-		status = ixgbe_init_ops_X540(hw);
-		break;
-	default:
-		status = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
-		break;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_set_mac_type - Sets MAC type
- *  @hw: pointer to the HW structure
- *
- *  This function sets the mac type of the adapter based on the
- *  vendor ID and device ID stored in the hw structure.
- **/
-s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-
-	if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) {
-		switch (hw->device_id) {
-		case IXGBE_DEV_ID_82598:
-		case IXGBE_DEV_ID_82598_BX:
-		case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
-		case IXGBE_DEV_ID_82598AF_DUAL_PORT:
-		case IXGBE_DEV_ID_82598AT:
-		case IXGBE_DEV_ID_82598AT2:
-		case IXGBE_DEV_ID_82598EB_CX4:
-		case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
-		case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
-		case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
-		case IXGBE_DEV_ID_82598EB_XF_LR:
-		case IXGBE_DEV_ID_82598EB_SFP_LOM:
-			hw->mac.type = ixgbe_mac_82598EB;
-			break;
-		case IXGBE_DEV_ID_82599_KX4:
-		case IXGBE_DEV_ID_82599_KX4_MEZZ:
-		case IXGBE_DEV_ID_82599_XAUI_LOM:
-		case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
-		case IXGBE_DEV_ID_82599_KR:
-		case IXGBE_DEV_ID_82599_SFP:
-		case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
-		case IXGBE_DEV_ID_82599_SFP_FCOE:
-		case IXGBE_DEV_ID_82599_SFP_EM:
-		case IXGBE_DEV_ID_82599_SFP_SF2:
-		case IXGBE_DEV_ID_82599_QSFP_SF_QP:
-		case IXGBE_DEV_ID_82599EN_SFP:
-		case IXGBE_DEV_ID_82599_CX4:
-		case IXGBE_DEV_ID_82599_LS:
-		case IXGBE_DEV_ID_82599_T3_LOM:
-			hw->mac.type = ixgbe_mac_82599EB;
-			break;
-		case IXGBE_DEV_ID_X540T:
-			hw->mac.type = ixgbe_mac_X540;
-			break;
-		default:
-			ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
-			break;
-		}
-	} else {
-		ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
-	}
-
-	hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n",
-		  hw->mac.type, ret_val);
-	return ret_val;
-}
-
-/**
- *  ixgbe_init_hw - Initialize the hardware
- *  @hw: pointer to hardware structure
- *
- *  Initialize the hardware by resetting and then starting the hardware
- **/
-s32 ixgbe_init_hw(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_reset_hw - Performs a hardware reset
- *  @hw: pointer to hardware structure
- *
- *  Resets the hardware by resetting the transmit and receive units, masks and
- *  clears all interrupts, performs a PHY reset, and performs a MAC reset
- **/
-s32 ixgbe_reset_hw(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_start_hw - Prepares hardware for Rx/Tx
- *  @hw: pointer to hardware structure
- *
- *  Starts the hardware by filling the bus info structure and media type,
- *  clears all on chip counters, initializes receive address registers,
- *  multicast table, VLAN filter table, calls routine to setup link and
- *  flow control settings, and leaves transmit and receive units disabled
- *  and uninitialized.
- **/
-s32 ixgbe_start_hw(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_clear_hw_cntrs - Clear hardware counters
- *  @hw: pointer to hardware structure
- *
- *  Clears all hardware statistics counters by reading them from the hardware
- *  Statistics counters are clear on read.
- **/
-s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_media_type - Get media type
- *  @hw: pointer to hardware structure
- *
- *  Returns the media type (fiber, copper, backplane)
- **/
-enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw),
-			       ixgbe_media_type_unknown);
-}
-
-/**
- *  ixgbe_get_mac_addr - Get MAC address
- *  @hw: pointer to hardware structure
- *  @mac_addr: Adapter MAC address
- *
- *  Reads the adapter's MAC address from the first Receive Address Register
- *  (RAR0) A reset of the adapter must have been performed prior to calling
- *  this function in order for the MAC address to have been loaded from the
- *  EEPROM into RAR0
- **/
-s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr,
-			       (hw, mac_addr), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_san_mac_addr - Get SAN MAC address
- *  @hw: pointer to hardware structure
- *  @san_mac_addr: SAN MAC address
- *
- *  Reads the SAN MAC address from the EEPROM, if it's available.  This is
- *  per-port, so set_lan_id() must be called before reading the addresses.
- **/
-s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr,
-			       (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_set_san_mac_addr - Write a SAN MAC address
- *  @hw: pointer to hardware structure
- *  @san_mac_addr: SAN MAC address
- *
- *  Writes A SAN MAC address to the EEPROM.
- **/
-s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr,
-			       (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_device_caps - Get additional device capabilities
- *  @hw: pointer to hardware structure
- *  @device_caps: the EEPROM word for device capabilities
- *
- *  Reads the extra device capabilities from the EEPROM
- **/
-s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_device_caps,
-			       (hw, device_caps), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM
- *  @hw: pointer to hardware structure
- *  @wwnn_prefix: the alternative WWNN prefix
- *  @wwpn_prefix: the alternative WWPN prefix
- *
- *  This function will read the EEPROM from the alternative SAN MAC address
- *  block to check the support for the alternative WWNN/WWPN prefix support.
- **/
-s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-			 u16 *wwpn_prefix)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix,
-			       (hw, wwnn_prefix, wwpn_prefix),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_fcoe_boot_status -  Get FCOE boot status from EEPROM
- *  @hw: pointer to hardware structure
- *  @bs: the fcoe boot status
- *
- *  This function will read the FCOE boot status from the iSCSI FCOE block
- **/
-s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status,
-			       (hw, bs),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_bus_info - Set PCI bus info
- *  @hw: pointer to hardware structure
- *
- *  Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
- **/
-s32 ixgbe_get_bus_info(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_num_of_tx_queues - Get Tx queues
- *  @hw: pointer to hardware structure
- *
- *  Returns the number of transmit queues for the given adapter.
- **/
-u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw)
-{
-	return hw->mac.max_tx_queues;
-}
-
-/**
- *  ixgbe_get_num_of_rx_queues - Get Rx queues
- *  @hw: pointer to hardware structure
- *
- *  Returns the number of receive queues for the given adapter.
- **/
-u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw)
-{
-	return hw->mac.max_rx_queues;
-}
-
-/**
- *  ixgbe_stop_adapter - Disable Rx/Tx units
- *  @hw: pointer to hardware structure
- *
- *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
- *  disables transmit and receive units. The adapter_stopped flag is used by
- *  the shared code and drivers to determine if the adapter is in a stopped
- *  state and should not touch the hardware.
- **/
-s32 ixgbe_stop_adapter(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_read_pba_string - Reads part number string from EEPROM
- *  @hw: pointer to hardware structure
- *  @pba_num: stores the part number string from the EEPROM
- *  @pba_num_size: part number string buffer length
- *
- *  Reads the part number string from the EEPROM.
- **/
-s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size)
-{
-	return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size);
-}
-
-/**
- *  ixgbe_identify_phy - Get PHY type
- *  @hw: pointer to hardware structure
- *
- *  Determines the physical layer module found on the current adapter.
- **/
-s32 ixgbe_identify_phy(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-
-	if (hw->phy.type == ixgbe_phy_unknown) {
-		status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw),
-					 IXGBE_NOT_IMPLEMENTED);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_reset_phy - Perform a PHY reset
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_reset_phy(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-
-	if (hw->phy.type == ixgbe_phy_unknown) {
-		if (ixgbe_identify_phy(hw) != 0)
-			status = IXGBE_ERR_PHY;
-	}
-
-	if (status == 0) {
-		status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw),
-					 IXGBE_NOT_IMPLEMENTED);
-	}
-	return status;
-}
-
-/**
- *  ixgbe_get_phy_firmware_version -
- *  @hw: pointer to hardware structure
- *  @firmware_version: pointer to firmware version
- **/
-s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version)
-{
-	s32 status = 0;
-
-	status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version,
-				 (hw, firmware_version),
-				 IXGBE_NOT_IMPLEMENTED);
-	return status;
-}
-
-/**
- *  ixgbe_read_phy_reg - Read PHY register
- *  @hw: pointer to hardware structure
- *  @reg_addr: 32 bit address of PHY register to read
- *  @phy_data: Pointer to read data from PHY register
- *
- *  Reads a value from a specified PHY register
- **/
-s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
-		       u16 *phy_data)
-{
-	if (hw->phy.id == 0)
-		ixgbe_identify_phy(hw);
-
-	return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr,
-			       device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_write_phy_reg - Write PHY register
- *  @hw: pointer to hardware structure
- *  @reg_addr: 32 bit PHY register to write
- *  @phy_data: Data to write to the PHY register
- *
- *  Writes a value to specified PHY register
- **/
-s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
-			u16 phy_data)
-{
-	if (hw->phy.id == 0)
-		ixgbe_identify_phy(hw);
-
-	return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr,
-			       device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_setup_phy_link - Restart PHY autoneg
- *  @hw: pointer to hardware structure
- *
- *  Restart autonegotiation and PHY and waits for completion.
- **/
-s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_check_phy_link - Determine link and speed status
- *  @hw: pointer to hardware structure
- *
- *  Reads a PHY register to determine if link is up and the current speed for
- *  the PHY.
- **/
-s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-			 bool *link_up)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed,
-			       link_up), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_setup_phy_link_speed - Set auto advertise
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *
- *  Sets the auto advertised capabilities
- **/
-s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-			       bool autoneg,
-			       bool autoneg_wait_to_complete)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed,
-			       autoneg, autoneg_wait_to_complete),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_check_link - Get link and speed status
- *  @hw: pointer to hardware structure
- *
- *  Reads the links register to determine if link is up and the current speed
- **/
-s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-		     bool *link_up, bool link_up_wait_to_complete)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed,
-			       link_up, link_up_wait_to_complete),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_disable_tx_laser - Disable Tx laser
- *  @hw: pointer to hardware structure
- *
- *  If the driver needs to disable the laser on SFI optics.
- **/
-void ixgbe_disable_tx_laser(struct ixgbe_hw *hw)
-{
-	if (hw->mac.ops.disable_tx_laser)
-		hw->mac.ops.disable_tx_laser(hw);
-}
-
-/**
- *  ixgbe_enable_tx_laser - Enable Tx laser
- *  @hw: pointer to hardware structure
- *
- *  If the driver needs to enable the laser on SFI optics.
- **/
-void ixgbe_enable_tx_laser(struct ixgbe_hw *hw)
-{
-	if (hw->mac.ops.enable_tx_laser)
-		hw->mac.ops.enable_tx_laser(hw);
-}
-
-/**
- *  ixgbe_flap_tx_laser - flap Tx laser to start autotry process
- *  @hw: pointer to hardware structure
- *
- *  When the driver changes the link speeds that it can support then
- *  flap the tx laser to alert the link partner to start autotry
- *  process on its end.
- **/
-void ixgbe_flap_tx_laser(struct ixgbe_hw *hw)
-{
-	if (hw->mac.ops.flap_tx_laser)
-		hw->mac.ops.flap_tx_laser(hw);
-}
-
-/**
- *  ixgbe_setup_link - Set link speed
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *
- *  Configures link settings.  Restarts the link.
- *  Performs autonegotiation if needed.
- **/
-s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-		     bool autoneg,
-		     bool autoneg_wait_to_complete)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed,
-			       autoneg, autoneg_wait_to_complete),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_link_capabilities - Returns link capabilities
- *  @hw: pointer to hardware structure
- *
- *  Determines the link capabilities of the current configuration.
- **/
-s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-				bool *autoneg)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw,
-			       speed, autoneg), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_led_on - Turn on LEDs
- *  @hw: pointer to hardware structure
- *  @index: led number to turn on
- *
- *  Turns on the software controllable LEDs.
- **/
-s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_led_off - Turn off LEDs
- *  @hw: pointer to hardware structure
- *  @index: led number to turn off
- *
- *  Turns off the software controllable LEDs.
- **/
-s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_blink_led_start - Blink LEDs
- *  @hw: pointer to hardware structure
- *  @index: led number to blink
- *
- *  Blink LED based on index.
- **/
-s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_blink_led_stop - Stop blinking LEDs
- *  @hw: pointer to hardware structure
- *
- *  Stop blinking LED based on index.
- **/
-s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_init_eeprom_params - Initialize EEPROM parameters
- *  @hw: pointer to hardware structure
- *
- *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
- *  ixgbe_hw struct in order to set up EEPROM access.
- **/
-s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-
-/**
- *  ixgbe_write_eeprom - Write word to EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be written to
- *  @data: 16 bit word to be written to the EEPROM
- *
- *  Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not
- *  called after this function, the EEPROM will most likely contain an
- *  invalid checksum.
- **/
-s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_write_eeprom_buffer - Write word(s) to EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be written to
- *  @data: 16 bit word(s) to be written to the EEPROM
- *  @words: number of words
- *
- *  Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not
- *  called after this function, the EEPROM will most likely contain an
- *  invalid checksum.
- **/
-s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words,
-			      u16 *data)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer,
-			       (hw, offset, words, data),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_read_eeprom - Read word from EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be read
- *  @data: read 16 bit value from EEPROM
- *
- *  Reads 16 bit value from EEPROM
- **/
-s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_read_eeprom_buffer - Read word(s) from EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be read
- *  @data: read 16 bit word(s) from EEPROM
- *  @words: number of words
- *
- *  Reads 16 bit word(s) from EEPROM
- **/
-s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
-			     u16 words, u16 *data)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer,
-			       (hw, offset, words, data),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_validate_eeprom_checksum - Validate EEPROM checksum
- *  @hw: pointer to hardware structure
- *  @checksum_val: calculated checksum
- *
- *  Performs checksum calculation and validates the EEPROM checksum
- **/
-s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum,
-			       (hw, checksum_val), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_eeprom_update_checksum - Updates the EEPROM checksum
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_insert_mac_addr - Find a RAR for this mac address
- *  @hw: pointer to hardware structure
- *  @addr: Address to put into receive address register
- *  @vmdq: VMDq pool to assign
- *
- *  Puts an ethernet address into a receive address register, or
- *  finds the rar that it is already in; adds to the pool list
- **/
-s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr,
-			       (hw, addr, vmdq),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_set_rar - Set Rx address register
- *  @hw: pointer to hardware structure
- *  @index: Receive address register to write
- *  @addr: Address to put into receive address register
- *  @vmdq: VMDq "set"
- *  @enable_addr: set flag that address is active
- *
- *  Puts an ethernet address into a receive address register.
- **/
-s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-		  u32 enable_addr)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq,
-			       enable_addr), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_clear_rar - Clear Rx address register
- *  @hw: pointer to hardware structure
- *  @index: Receive address register to write
- *
- *  Puts an ethernet address into a receive address register.
- **/
-s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_set_vmdq - Associate a VMDq index with a receive address
- *  @hw: pointer to hardware structure
- *  @rar: receive address register index to associate with VMDq index
- *  @vmdq: VMDq set or pool index
- **/
-s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq),
-			       IXGBE_NOT_IMPLEMENTED);
-
-}
-
-/**
- *  ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address
- *  @hw: pointer to hardware structure
- *  @vmdq: VMDq default pool index
- **/
-s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac,
-			       (hw, vmdq), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address
- *  @hw: pointer to hardware structure
- *  @rar: receive address register index to disassociate with VMDq index
- *  @vmdq: VMDq set or pool index
- **/
-s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_init_rx_addrs - Initializes receive address filters.
- *  @hw: pointer to hardware structure
- *
- *  Places the MAC address in receive address register 0 and clears the rest
- *  of the receive address registers. Clears the multicast table. Assumes
- *  the receiver is in reset when the routine is called.
- **/
-s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_num_rx_addrs - Returns the number of RAR entries.
- *  @hw: pointer to hardware structure
- **/
-u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw)
-{
-	return hw->mac.num_rar_entries;
-}
-
-/**
- *  ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses
- *  @hw: pointer to hardware structure
- *  @addr_list: the list of new multicast addresses
- *  @addr_count: number of addresses
- *  @func: iterator function to walk the multicast address list
- *
- *  The given list replaces any existing list. Clears the secondary addrs from
- *  receive address registers. Uses unused receive address registers for the
- *  first secondary addresses, and falls back to promiscuous mode as needed.
- **/
-s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
-			      u32 addr_count, ixgbe_mc_addr_itr func)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw,
-			       addr_list, addr_count, func),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses
- *  @hw: pointer to hardware structure
- *  @mc_addr_list: the list of new multicast addresses
- *  @mc_addr_count: number of addresses
- *  @func: iterator function to walk the multicast address list
- *
- *  The given list replaces any existing list. Clears the MC addrs from receive
- *  address registers and the multicast table. Uses unused receive address
- *  registers for the first multicast addresses, and hashes the rest into the
- *  multicast table.
- **/
-s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
-			      u32 mc_addr_count, ixgbe_mc_addr_itr func,
-			      bool clear)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw,
-			       mc_addr_list, mc_addr_count, func, clear),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_enable_mc - Enable multicast address in RAR
- *  @hw: pointer to hardware structure
- *
- *  Enables multicast address in RAR and the use of the multicast hash table.
- **/
-s32 ixgbe_enable_mc(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_disable_mc - Disable multicast address in RAR
- *  @hw: pointer to hardware structure
- *
- *  Disables multicast address in RAR and the use of the multicast hash table.
- **/
-s32 ixgbe_disable_mc(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_clear_vfta - Clear VLAN filter table
- *  @hw: pointer to hardware structure
- *
- *  Clears the VLAN filer table, and the VMDq index associated with the filter
- **/
-s32 ixgbe_clear_vfta(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_set_vfta - Set VLAN filter table
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *  @vind: VMDq output index that maps queue to VLAN id in VFTA
- *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
- *
- *  Turn on/off specified VLAN in the VLAN filter table.
- **/
-s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind,
-			       vlan_on), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_set_vlvf - Set VLAN Pool Filter
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
- *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
- *  @vfta_changed: pointer to boolean flag which indicates whether VFTA
- *                 should be changed
- *
- *  Turn on/off specified bit in VLVF table.
- **/
-s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
-		    bool *vfta_changed)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind,
-			       vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_fc_enable - Enable flow control
- *  @hw: pointer to hardware structure
- *
- *  Configures the flow control settings based on SW configuration.
- **/
-s32 ixgbe_fc_enable(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- * ixgbe_set_fw_drv_ver - Try to send the driver version number FW
- * @hw: pointer to hardware structure
- * @maj: driver major number to be sent to firmware
- * @min: driver minor number to be sent to firmware
- * @build: driver build number to be sent to firmware
- * @ver: driver version number to be sent to firmware
- **/
-s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
-			 u8 ver)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min,
-			       build, ver), IXGBE_NOT_IMPLEMENTED);
-}
-
-
-/**
- *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
- *  @hw: pointer to hardware structure
- *
- *  Updates the temperatures in mac.thermal_sensor_data
- **/
-s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw),
-				IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds
- *  @hw: pointer to hardware structure
- *
- *  Inits the thermal sensor thresholds according to the NVM map
- **/
-s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw),
-				IXGBE_NOT_IMPLEMENTED);
-}
-/**
- *  ixgbe_read_analog_reg8 - Reads 8 bit analog register
- *  @hw: pointer to hardware structure
- *  @reg: analog register to read
- *  @val: read value
- *
- *  Performs write operation to analog register specified.
- **/
-s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg,
-			       val), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_write_analog_reg8 - Writes 8 bit analog register
- *  @hw: pointer to hardware structure
- *  @reg: analog register to write
- *  @val: value to write
- *
- *  Performs write operation to Atlas analog register specified.
- **/
-s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg,
-			       val), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_init_uta_tables - Initializes Unicast Table Arrays.
- *  @hw: pointer to hardware structure
- *
- *  Initializes the Unicast Table Arrays to zero on device load.  This
- *  is part of the Rx init addr execution path.
- **/
-s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to read
- *  @data: value read
- *
- *  Performs byte read operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
-			u8 *data)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset,
-			       dev_addr, data), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_write_i2c_byte - Writes 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @data: value to write
- *
- *  Performs byte write operation to SFP module's EEPROM over I2C interface
- *  at a specified device address.
- **/
-s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
-			 u8 data)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset,
-			       dev_addr, data), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface
- *  @hw: pointer to hardware structure
- *  @byte_offset: EEPROM byte offset to write
- *  @eeprom_data: value to write
- *
- *  Performs byte write operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw,
-			   u8 byte_offset, u8 eeprom_data)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom,
-			       (hw, byte_offset, eeprom_data),
-			       IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface
- *  @hw: pointer to hardware structure
- *  @byte_offset: EEPROM byte offset to read
- *  @eeprom_data: value read
- *
- *  Performs byte read operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data)
-{
-	return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom,
-			      (hw, byte_offset, eeprom_data),
-			      IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_get_supported_physical_layer - Returns physical layer type
- *  @hw: pointer to hardware structure
- *
- *  Determines physical layer capabilities of the current configuration.
- **/
-u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer,
-			       (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN);
-}
-
-/**
- *  ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics
- *  @hw: pointer to hardware structure
- *  @regval: bitfield to write to the Rx DMA register
- *
- *  Enables the Rx DMA unit of the device.
- **/
-s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma,
-			       (hw, regval), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_disable_sec_rx_path - Stops the receive data path
- *  @hw: pointer to hardware structure
- *
- *  Stops the receive data path.
- **/
-s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path,
-				(hw), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_enable_sec_rx_path - Enables the receive data path
- *  @hw: pointer to hardware structure
- *
- *  Enables the receive data path.
- **/
-s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path,
-				(hw), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to acquire
- *
- *  Acquires the SWFW semaphore through SW_FW_SYNC register for the specified
- *  function (CSR, PHY0, PHY1, EEPROM, Flash)
- **/
-s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
-{
-	return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync,
-			       (hw, mask), IXGBE_NOT_IMPLEMENTED);
-}
-
-/**
- *  ixgbe_release_swfw_semaphore - Release SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to release
- *
- *  Releases the SWFW semaphore through SW_FW_SYNC register for the specified
- *  function (CSR, PHY0, PHY1, EEPROM, Flash)
- **/
-void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
-{
-	if (hw->mac.ops.release_swfw_sync)
-		hw->mac.ops.release_swfw_sync(hw, mask);
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
deleted file mode 100644
index 11247a0b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_API_H_
-#define _IXGBE_API_H_
-
-#include "ixgbe_type.h"
-
-s32 ixgbe_init_shared_code(struct ixgbe_hw *hw);
-
-extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw);
-extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
-extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw);
-
-s32 ixgbe_set_mac_type(struct ixgbe_hw *hw);
-s32 ixgbe_init_hw(struct ixgbe_hw *hw);
-s32 ixgbe_reset_hw(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw(struct ixgbe_hw *hw);
-s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw);
-enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw);
-s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr);
-s32 ixgbe_get_bus_info(struct ixgbe_hw *hw);
-u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw);
-u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw);
-s32 ixgbe_stop_adapter(struct ixgbe_hw *hw);
-s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size);
-
-s32 ixgbe_identify_phy(struct ixgbe_hw *hw);
-s32 ixgbe_reset_phy(struct ixgbe_hw *hw);
-s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
-		       u16 *phy_data);
-s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
-			u16 phy_data);
-
-s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw);
-s32 ixgbe_check_phy_link(struct ixgbe_hw *hw,
-			 ixgbe_link_speed *speed,
-			 bool *link_up);
-s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw,
-			       ixgbe_link_speed speed,
-			       bool autoneg,
-			       bool autoneg_wait_to_complete);
-void ixgbe_disable_tx_laser(struct ixgbe_hw *hw);
-void ixgbe_enable_tx_laser(struct ixgbe_hw *hw);
-void ixgbe_flap_tx_laser(struct ixgbe_hw *hw);
-s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-		     bool autoneg, bool autoneg_wait_to_complete);
-s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-		     bool *link_up, bool link_up_wait_to_complete);
-s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-				bool *autoneg);
-s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index);
-
-s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw);
-s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
-			      u16 words, u16 *data);
-s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data);
-s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
-			     u16 words, u16 *data);
-
-s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val);
-s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw);
-
-s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
-s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-		  u32 enable_addr);
-s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq);
-s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw);
-u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw);
-s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
-			      u32 addr_count, ixgbe_mc_addr_itr func);
-s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
-			      u32 mc_addr_count, ixgbe_mc_addr_itr func,
-			      bool clear);
-void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq);
-s32 ixgbe_enable_mc(struct ixgbe_hw *hw);
-s32 ixgbe_disable_mc(struct ixgbe_hw *hw);
-s32 ixgbe_clear_vfta(struct ixgbe_hw *hw);
-s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan,
-		   u32 vind, bool vlan_on);
-s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-		   bool vlan_on, bool *vfta_changed);
-s32 ixgbe_fc_enable(struct ixgbe_hw *hw);
-s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
-			 u8 ver);
-s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw);
-s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw);
-void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr);
-s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw,
-				   u16 *firmware_version);
-s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val);
-s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val);
-s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw);
-s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data);
-u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw);
-s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval);
-s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw);
-s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw);
-s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
-s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_hash_dword input,
-					  union ixgbe_atr_hash_dword common,
-					  u8 queue);
-s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
-				    union ixgbe_atr_input *input_mask);
-s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_input *input,
-					  u16 soft_id, u8 queue);
-s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
-					  union ixgbe_atr_input *input,
-					  u16 soft_id);
-s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-					union ixgbe_atr_input *input,
-					union ixgbe_atr_input *mask,
-					u16 soft_id,
-					u8 queue);
-void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
-					  union ixgbe_atr_input *mask);
-u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
-				     union ixgbe_atr_hash_dword common);
-s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
-			u8 *data);
-s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
-			 u8 data);
-s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data);
-s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
-s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
-s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps);
-s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
-void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
-s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-			 u16 *wwpn_prefix);
-s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs);
-
-#endif /* _IXGBE_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
deleted file mode 100644
index e9b9529a..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
+++ /dev/null
@@ -1,4067 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_common.h"
-#include "ixgbe_phy.h"
-#include "ixgbe_api.h"
-
-static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
-static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
-static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw);
-static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
-static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
-static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
-					u16 count);
-static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count);
-static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
-static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
-static void ixgbe_release_eeprom(struct ixgbe_hw *hw);
-
-static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
-static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
-					 u16 *san_mac_offset);
-static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
-					     u16 words, u16 *data);
-static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
-					      u16 words, u16 *data);
-static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
-						 u16 offset);
-
-/**
- *  ixgbe_init_ops_generic - Inits function ptrs
- *  @hw: pointer to the hardware structure
- *
- *  Initialize the function pointers.
- **/
-s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	struct ixgbe_mac_info *mac = &hw->mac;
-	u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-	/* EEPROM */
-	eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic;
-	/* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */
-	if (eec & IXGBE_EEC_PRES) {
-		eeprom->ops.read = &ixgbe_read_eerd_generic;
-		eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic;
-	} else {
-		eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic;
-		eeprom->ops.read_buffer =
-				 &ixgbe_read_eeprom_buffer_bit_bang_generic;
-	}
-	eeprom->ops.write = &ixgbe_write_eeprom_generic;
-	eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic;
-	eeprom->ops.validate_checksum =
-				      &ixgbe_validate_eeprom_checksum_generic;
-	eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic;
-	eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic;
-
-	/* MAC */
-	mac->ops.init_hw = &ixgbe_init_hw_generic;
-	mac->ops.reset_hw = NULL;
-	mac->ops.start_hw = &ixgbe_start_hw_generic;
-	mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic;
-	mac->ops.get_media_type = NULL;
-	mac->ops.get_supported_physical_layer = NULL;
-	mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic;
-	mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic;
-	mac->ops.stop_adapter = &ixgbe_stop_adapter_generic;
-	mac->ops.get_bus_info = &ixgbe_get_bus_info_generic;
-	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie;
-	mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync;
-	mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync;
-
-	/* LEDs */
-	mac->ops.led_on = &ixgbe_led_on_generic;
-	mac->ops.led_off = &ixgbe_led_off_generic;
-	mac->ops.blink_led_start = &ixgbe_blink_led_start_generic;
-	mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic;
-
-	/* RAR, Multicast, VLAN */
-	mac->ops.set_rar = &ixgbe_set_rar_generic;
-	mac->ops.clear_rar = &ixgbe_clear_rar_generic;
-	mac->ops.insert_mac_addr = NULL;
-	mac->ops.set_vmdq = NULL;
-	mac->ops.clear_vmdq = NULL;
-	mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic;
-	mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic;
-	mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic;
-	mac->ops.enable_mc = &ixgbe_enable_mc_generic;
-	mac->ops.disable_mc = &ixgbe_disable_mc_generic;
-	mac->ops.clear_vfta = NULL;
-	mac->ops.set_vfta = NULL;
-	mac->ops.set_vlvf = NULL;
-	mac->ops.init_uta_tables = NULL;
-
-	/* Flow Control */
-	mac->ops.fc_enable = &ixgbe_fc_enable_generic;
-
-	/* Link */
-	mac->ops.get_link_capabilities = NULL;
-	mac->ops.setup_link = NULL;
-	mac->ops.check_link = NULL;
-
-	return 0;
-}
-
-/**
- *  ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow
- *  control
- *  @hw: pointer to hardware structure
- *
- *  There are several phys that do not support autoneg flow control. This
- *  function check the device id to see if the associated phy supports
- *  autoneg flow control.
- **/
-static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
-{
-
-	switch (hw->device_id) {
-	case IXGBE_DEV_ID_X540T:
-		return 0;
-	case IXGBE_DEV_ID_82599_T3_LOM:
-		return 0;
-	default:
-		return IXGBE_ERR_FC_NOT_SUPPORTED;
-	}
-}
-
-/**
- *  ixgbe_setup_fc - Set up flow control
- *  @hw: pointer to hardware structure
- *
- *  Called at init time to set up flow control.
- **/
-static s32 ixgbe_setup_fc(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-	u32 reg = 0, reg_bp = 0;
-	u16 reg_cu = 0;
-
-	/*
-	 * Validate the requested mode.  Strict IEEE mode does not allow
-	 * ixgbe_fc_rx_pause because it will cause us to fail at UNH.
-	 */
-	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
-		hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
-		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
-		goto out;
-	}
-
-	/*
-	 * 10gig parts do not have a word in the EEPROM to determine the
-	 * default flow control setting, so we explicitly set it to full.
-	 */
-	if (hw->fc.requested_mode == ixgbe_fc_default)
-		hw->fc.requested_mode = ixgbe_fc_full;
-
-	/*
-	 * Set up the 1G and 10G flow control advertisement registers so the
-	 * HW will be able to do fc autoneg once the cable is plugged in.  If
-	 * we link at 10G, the 1G advertisement is harmless and vice versa.
-	 */
-	switch (hw->phy.media_type) {
-	case ixgbe_media_type_fiber:
-	case ixgbe_media_type_backplane:
-		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
-		reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-		break;
-	case ixgbe_media_type_copper:
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg_cu);
-		break;
-	default:
-		break;
-	}
-
-	/*
-	 * The possible values of fc.requested_mode are:
-	 * 0: Flow control is completely disabled
-	 * 1: Rx flow control is enabled (we can receive pause frames,
-	 *    but not send pause frames).
-	 * 2: Tx flow control is enabled (we can send pause frames but
-	 *    we do not support receiving pause frames).
-	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
-	 * other: Invalid.
-	 */
-	switch (hw->fc.requested_mode) {
-	case ixgbe_fc_none:
-		/* Flow control completely disabled by software override. */
-		reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE);
-		if (hw->phy.media_type == ixgbe_media_type_backplane)
-			reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE |
-				    IXGBE_AUTOC_ASM_PAUSE);
-		else if (hw->phy.media_type == ixgbe_media_type_copper)
-			reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE);
-		break;
-	case ixgbe_fc_tx_pause:
-		/*
-		 * Tx Flow control is enabled, and Rx Flow control is
-		 * disabled by software override.
-		 */
-		reg |= IXGBE_PCS1GANA_ASM_PAUSE;
-		reg &= ~IXGBE_PCS1GANA_SYM_PAUSE;
-		if (hw->phy.media_type == ixgbe_media_type_backplane) {
-			reg_bp |= IXGBE_AUTOC_ASM_PAUSE;
-			reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE;
-		} else if (hw->phy.media_type == ixgbe_media_type_copper) {
-			reg_cu |= IXGBE_TAF_ASM_PAUSE;
-			reg_cu &= ~IXGBE_TAF_SYM_PAUSE;
-		}
-		break;
-	case ixgbe_fc_rx_pause:
-		/*
-		 * Rx Flow control is enabled and Tx Flow control is
-		 * disabled by software override. Since there really
-		 * isn't a way to advertise that we are capable of RX
-		 * Pause ONLY, we will advertise that we support both
-		 * symmetric and asymmetric Rx PAUSE, as such we fall
-		 * through to the fc_full statement.  Later, we will
-		 * disable the adapter's ability to send PAUSE frames.
-		 */
-	case ixgbe_fc_full:
-		/* Flow control (both Rx and Tx) is enabled by SW override. */
-		reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE;
-		if (hw->phy.media_type == ixgbe_media_type_backplane)
-			reg_bp |= IXGBE_AUTOC_SYM_PAUSE |
-				  IXGBE_AUTOC_ASM_PAUSE;
-		else if (hw->phy.media_type == ixgbe_media_type_copper)
-			reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE;
-		break;
-	default:
-		hw_dbg(hw, "Flow control param set incorrectly\n");
-		ret_val = IXGBE_ERR_CONFIG;
-		goto out;
-		break;
-	}
-
-	if (hw->mac.type != ixgbe_mac_X540) {
-		/*
-		 * Enable auto-negotiation between the MAC & PHY;
-		 * the MAC will advertise clause 37 flow control.
-		 */
-		IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg);
-		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
-
-		/* Disable AN timeout */
-		if (hw->fc.strict_ieee)
-			reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN;
-
-		IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg);
-		hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg);
-	}
-
-	/*
-	 * AUTOC restart handles negotiation of 1G and 10G on backplane
-	 * and copper. There is no need to set the PCS1GCTL register.
-	 *
-	 */
-	if (hw->phy.media_type == ixgbe_media_type_backplane) {
-		reg_bp |= IXGBE_AUTOC_AN_RESTART;
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp);
-	} else if ((hw->phy.media_type == ixgbe_media_type_copper) &&
-		    (ixgbe_device_supports_autoneg_fc(hw) == 0)) {
-		hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu);
-	}
-
-	hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg);
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_start_hw_generic - Prepare hardware for Tx/Rx
- *  @hw: pointer to hardware structure
- *
- *  Starts the hardware by filling the bus info structure and media type, clears
- *  all on chip counters, initializes receive address registers, multicast
- *  table, VLAN filter table, calls routine to set up link and flow control
- *  settings, and leaves transmit and receive units disabled and uninitialized
- **/
-s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
-{
-	s32 ret_val;
-	u32 ctrl_ext;
-
-	/* Set the media type */
-	hw->phy.media_type = hw->mac.ops.get_media_type(hw);
-
-	/* PHY ops initialization must be done in reset_hw() */
-
-	/* Clear the VLAN filter table */
-	hw->mac.ops.clear_vfta(hw);
-
-	/* Clear statistics registers */
-	hw->mac.ops.clear_hw_cntrs(hw);
-
-	/* Set No Snoop Disable */
-	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
-	ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS;
-	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Setup flow control */
-	ret_val = ixgbe_setup_fc(hw);
-	if (ret_val != 0)
-		goto out;
-
-	/* Clear adapter stopped flag */
-	hw->adapter_stopped = false;
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_start_hw_gen2 - Init sequence for common device family
- *  @hw: pointer to hw structure
- *
- * Performs the init sequence common to the second generation
- * of 10 GbE devices.
- * Devices in the second generation:
- *     82599
- *     X540
- **/
-s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
-{
-	u32 i;
-	u32 regval;
-
-	/* Clear the rate limiters */
-	for (i = 0; i < hw->mac.max_tx_queues; i++) {
-		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
-		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
-	}
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Disable relaxed ordering */
-	for (i = 0; i < hw->mac.max_tx_queues; i++) {
-		regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
-		regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-		IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
-	}
-
-	for (i = 0; i < hw->mac.max_rx_queues; i++) {
-		regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-		regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-			    IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-		IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_init_hw_generic - Generic hardware initialization
- *  @hw: pointer to hardware structure
- *
- *  Initialize the hardware by resetting the hardware, filling the bus info
- *  structure and media type, clears all on chip counters, initializes receive
- *  address registers, multicast table, VLAN filter table, calls routine to set
- *  up link and flow control settings, and leaves transmit and receive units
- *  disabled and uninitialized
- **/
-s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
-{
-	s32 status;
-
-	/* Reset the hardware */
-	status = hw->mac.ops.reset_hw(hw);
-
-	if (status == 0) {
-		/* Start the HW */
-		status = hw->mac.ops.start_hw(hw);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters
- *  @hw: pointer to hardware structure
- *
- *  Clears all hardware statistics counters by reading them from the hardware
- *  Statistics counters are clear on read.
- **/
-s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
-{
-	u16 i = 0;
-
-	IXGBE_READ_REG(hw, IXGBE_CRCERRS);
-	IXGBE_READ_REG(hw, IXGBE_ILLERRC);
-	IXGBE_READ_REG(hw, IXGBE_ERRBC);
-	IXGBE_READ_REG(hw, IXGBE_MSPDC);
-	for (i = 0; i < 8; i++)
-		IXGBE_READ_REG(hw, IXGBE_MPC(i));
-
-	IXGBE_READ_REG(hw, IXGBE_MLFC);
-	IXGBE_READ_REG(hw, IXGBE_MRFC);
-	IXGBE_READ_REG(hw, IXGBE_RLEC);
-	IXGBE_READ_REG(hw, IXGBE_LXONTXC);
-	IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
-	if (hw->mac.type >= ixgbe_mac_82599EB) {
-		IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
-		IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
-	} else {
-		IXGBE_READ_REG(hw, IXGBE_LXONRXC);
-		IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
-	}
-
-	for (i = 0; i < 8; i++) {
-		IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
-		IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
-		if (hw->mac.type >= ixgbe_mac_82599EB) {
-			IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
-			IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
-		} else {
-			IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
-			IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
-		}
-	}
-	if (hw->mac.type >= ixgbe_mac_82599EB)
-		for (i = 0; i < 8; i++)
-			IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
-	IXGBE_READ_REG(hw, IXGBE_PRC64);
-	IXGBE_READ_REG(hw, IXGBE_PRC127);
-	IXGBE_READ_REG(hw, IXGBE_PRC255);
-	IXGBE_READ_REG(hw, IXGBE_PRC511);
-	IXGBE_READ_REG(hw, IXGBE_PRC1023);
-	IXGBE_READ_REG(hw, IXGBE_PRC1522);
-	IXGBE_READ_REG(hw, IXGBE_GPRC);
-	IXGBE_READ_REG(hw, IXGBE_BPRC);
-	IXGBE_READ_REG(hw, IXGBE_MPRC);
-	IXGBE_READ_REG(hw, IXGBE_GPTC);
-	IXGBE_READ_REG(hw, IXGBE_GORCL);
-	IXGBE_READ_REG(hw, IXGBE_GORCH);
-	IXGBE_READ_REG(hw, IXGBE_GOTCL);
-	IXGBE_READ_REG(hw, IXGBE_GOTCH);
-	if (hw->mac.type == ixgbe_mac_82598EB)
-		for (i = 0; i < 8; i++)
-			IXGBE_READ_REG(hw, IXGBE_RNBC(i));
-	IXGBE_READ_REG(hw, IXGBE_RUC);
-	IXGBE_READ_REG(hw, IXGBE_RFC);
-	IXGBE_READ_REG(hw, IXGBE_ROC);
-	IXGBE_READ_REG(hw, IXGBE_RJC);
-	IXGBE_READ_REG(hw, IXGBE_MNGPRC);
-	IXGBE_READ_REG(hw, IXGBE_MNGPDC);
-	IXGBE_READ_REG(hw, IXGBE_MNGPTC);
-	IXGBE_READ_REG(hw, IXGBE_TORL);
-	IXGBE_READ_REG(hw, IXGBE_TORH);
-	IXGBE_READ_REG(hw, IXGBE_TPR);
-	IXGBE_READ_REG(hw, IXGBE_TPT);
-	IXGBE_READ_REG(hw, IXGBE_PTC64);
-	IXGBE_READ_REG(hw, IXGBE_PTC127);
-	IXGBE_READ_REG(hw, IXGBE_PTC255);
-	IXGBE_READ_REG(hw, IXGBE_PTC511);
-	IXGBE_READ_REG(hw, IXGBE_PTC1023);
-	IXGBE_READ_REG(hw, IXGBE_PTC1522);
-	IXGBE_READ_REG(hw, IXGBE_MPTC);
-	IXGBE_READ_REG(hw, IXGBE_BPTC);
-	for (i = 0; i < 16; i++) {
-		IXGBE_READ_REG(hw, IXGBE_QPRC(i));
-		IXGBE_READ_REG(hw, IXGBE_QPTC(i));
-		if (hw->mac.type >= ixgbe_mac_82599EB) {
-			IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
-			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i));
-			IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
-			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i));
-			IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
-		} else {
-			IXGBE_READ_REG(hw, IXGBE_QBRC(i));
-			IXGBE_READ_REG(hw, IXGBE_QBTC(i));
-		}
-	}
-
-	if (hw->mac.type == ixgbe_mac_X540) {
-		if (hw->phy.id == 0)
-			ixgbe_identify_phy(hw);
-		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL,
-				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
-		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH,
-				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
-		hw->phy.ops.read_reg(hw, IXGBE_LDPCECL,
-				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
-		hw->phy.ops.read_reg(hw, IXGBE_LDPCECH,
-				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_read_pba_string_generic - Reads part number string from EEPROM
- *  @hw: pointer to hardware structure
- *  @pba_num: stores the part number string from the EEPROM
- *  @pba_num_size: part number string buffer length
- *
- *  Reads the part number string from the EEPROM.
- **/
-s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
-				  u32 pba_num_size)
-{
-	s32 ret_val;
-	u16 data;
-	u16 pba_ptr;
-	u16 offset;
-	u16 length;
-
-	if (pba_num == NULL) {
-		hw_dbg(hw, "PBA string buffer was null\n");
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data);
-	if (ret_val) {
-		hw_dbg(hw, "NVM Read Error\n");
-		return ret_val;
-	}
-
-	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr);
-	if (ret_val) {
-		hw_dbg(hw, "NVM Read Error\n");
-		return ret_val;
-	}
-
-	/*
-	 * if data is not ptr guard the PBA must be in legacy format which
-	 * means pba_ptr is actually our second data word for the PBA number
-	 * and we can decode it into an ascii string
-	 */
-	if (data != IXGBE_PBANUM_PTR_GUARD) {
-		hw_dbg(hw, "NVM PBA number is not stored as string\n");
-
-		/* we will need 11 characters to store the PBA */
-		if (pba_num_size < 11) {
-			hw_dbg(hw, "PBA string buffer too small\n");
-			return IXGBE_ERR_NO_SPACE;
-		}
-
-		/* extract hex string from data and pba_ptr */
-		pba_num[0] = (data >> 12) & 0xF;
-		pba_num[1] = (data >> 8) & 0xF;
-		pba_num[2] = (data >> 4) & 0xF;
-		pba_num[3] = data & 0xF;
-		pba_num[4] = (pba_ptr >> 12) & 0xF;
-		pba_num[5] = (pba_ptr >> 8) & 0xF;
-		pba_num[6] = '-';
-		pba_num[7] = 0;
-		pba_num[8] = (pba_ptr >> 4) & 0xF;
-		pba_num[9] = pba_ptr & 0xF;
-
-		/* put a null character on the end of our string */
-		pba_num[10] = '\0';
-
-		/* switch all the data but the '-' to hex char */
-		for (offset = 0; offset < 10; offset++) {
-			if (pba_num[offset] < 0xA)
-				pba_num[offset] += '0';
-			else if (pba_num[offset] < 0x10)
-				pba_num[offset] += 'A' - 0xA;
-		}
-
-		return 0;
-	}
-
-	ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length);
-	if (ret_val) {
-		hw_dbg(hw, "NVM Read Error\n");
-		return ret_val;
-	}
-
-	if (length == 0xFFFF || length == 0) {
-		hw_dbg(hw, "NVM PBA number section invalid length\n");
-		return IXGBE_ERR_PBA_SECTION;
-	}
-
-	/* check if pba_num buffer is big enough */
-	if (pba_num_size  < (((u32)length * 2) - 1)) {
-		hw_dbg(hw, "PBA string buffer too small\n");
-		return IXGBE_ERR_NO_SPACE;
-	}
-
-	/* trim pba length from start of string */
-	pba_ptr++;
-	length--;
-
-	for (offset = 0; offset < length; offset++) {
-		ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data);
-		if (ret_val) {
-			hw_dbg(hw, "NVM Read Error\n");
-			return ret_val;
-		}
-		pba_num[offset * 2] = (u8)(data >> 8);
-		pba_num[(offset * 2) + 1] = (u8)(data & 0xFF);
-	}
-	pba_num[offset * 2] = '\0';
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_mac_addr_generic - Generic get MAC address
- *  @hw: pointer to hardware structure
- *  @mac_addr: Adapter MAC address
- *
- *  Reads the adapter's MAC address from first Receive Address Register (RAR0)
- *  A reset of the adapter must be performed prior to calling this function
- *  in order for the MAC address to have been loaded from the EEPROM into RAR0
- **/
-s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
-{
-	u32 rar_high;
-	u32 rar_low;
-	u16 i;
-
-	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
-	rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
-
-	for (i = 0; i < 4; i++)
-		mac_addr[i] = (u8)(rar_low >> (i*8));
-
-	for (i = 0; i < 2; i++)
-		mac_addr[i+4] = (u8)(rar_high >> (i*8));
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_bus_info_generic - Generic set PCI bus info
- *  @hw: pointer to hardware structure
- *
- *  Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
- **/
-s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	u16 link_status;
-
-	hw->bus.type = ixgbe_bus_type_pci_express;
-
-	/* Get the negotiated link width and speed from PCI config space */
-	link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
-
-	switch (link_status & IXGBE_PCI_LINK_WIDTH) {
-	case IXGBE_PCI_LINK_WIDTH_1:
-		hw->bus.width = ixgbe_bus_width_pcie_x1;
-		break;
-	case IXGBE_PCI_LINK_WIDTH_2:
-		hw->bus.width = ixgbe_bus_width_pcie_x2;
-		break;
-	case IXGBE_PCI_LINK_WIDTH_4:
-		hw->bus.width = ixgbe_bus_width_pcie_x4;
-		break;
-	case IXGBE_PCI_LINK_WIDTH_8:
-		hw->bus.width = ixgbe_bus_width_pcie_x8;
-		break;
-	default:
-		hw->bus.width = ixgbe_bus_width_unknown;
-		break;
-	}
-
-	switch (link_status & IXGBE_PCI_LINK_SPEED) {
-	case IXGBE_PCI_LINK_SPEED_2500:
-		hw->bus.speed = ixgbe_bus_speed_2500;
-		break;
-	case IXGBE_PCI_LINK_SPEED_5000:
-		hw->bus.speed = ixgbe_bus_speed_5000;
-		break;
-	case IXGBE_PCI_LINK_SPEED_8000:
-		hw->bus.speed = ixgbe_bus_speed_8000;
-		break;
-	default:
-		hw->bus.speed = ixgbe_bus_speed_unknown;
-		break;
-	}
-
-	mac->ops.set_lan_id(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
- *  @hw: pointer to the HW structure
- *
- *  Determines the LAN function id by reading memory-mapped registers
- *  and swaps the port value if requested.
- **/
-void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
-{
-	struct ixgbe_bus_info *bus = &hw->bus;
-	u32 reg;
-
-	reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
-	bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
-	bus->lan_id = bus->func;
-
-	/* check for a port swap */
-	reg = IXGBE_READ_REG(hw, IXGBE_FACTPS);
-	if (reg & IXGBE_FACTPS_LFS)
-		bus->func ^= 0x1;
-}
-
-/**
- *  ixgbe_stop_adapter_generic - Generic stop Tx/Rx units
- *  @hw: pointer to hardware structure
- *
- *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
- *  disables transmit and receive units. The adapter_stopped flag is used by
- *  the shared code and drivers to determine if the adapter is in a stopped
- *  state and should not touch the hardware.
- **/
-s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
-{
-	u32 reg_val;
-	u16 i;
-
-	/*
-	 * Set the adapter_stopped flag so other driver functions stop touching
-	 * the hardware
-	 */
-	hw->adapter_stopped = true;
-
-	/* Disable the receive unit */
-	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0);
-
-	/* Clear interrupt mask to stop interrupts from being generated */
-	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
-
-	/* Clear any pending interrupts, flush previous writes */
-	IXGBE_READ_REG(hw, IXGBE_EICR);
-
-	/* Disable the transmit unit.  Each queue must be disabled. */
-	for (i = 0; i < hw->mac.max_tx_queues; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH);
-
-	/* Disable the receive unit by stopping each queue */
-	for (i = 0; i < hw->mac.max_rx_queues; i++) {
-		reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
-		reg_val &= ~IXGBE_RXDCTL_ENABLE;
-		reg_val |= IXGBE_RXDCTL_SWFLSH;
-		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val);
-	}
-
-	/* flush all queues disables */
-	IXGBE_WRITE_FLUSH(hw);
-	msleep(2);
-
-	/*
-	 * Prevent the PCI-E bus from from hanging by disabling PCI-E master
-	 * access and verify no pending requests
-	 */
-	return ixgbe_disable_pcie_master(hw);
-}
-
-/**
- *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
- *  @hw: pointer to hardware structure
- *  @index: led number to turn on
- **/
-s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
-{
-	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-
-	/* To turn on the LED, set mode to ON. */
-	led_reg &= ~IXGBE_LED_MODE_MASK(index);
-	led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_led_off_generic - Turns off the software controllable LEDs.
- *  @hw: pointer to hardware structure
- *  @index: led number to turn off
- **/
-s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
-{
-	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-
-	/* To turn off the LED, set mode to OFF. */
-	led_reg &= ~IXGBE_LED_MODE_MASK(index);
-	led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_init_eeprom_params_generic - Initialize EEPROM params
- *  @hw: pointer to hardware structure
- *
- *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
- *  ixgbe_hw struct in order to set up EEPROM access.
- **/
-s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	u32 eec;
-	u16 eeprom_size;
-
-	if (eeprom->type == ixgbe_eeprom_uninitialized) {
-		eeprom->type = ixgbe_eeprom_none;
-		/* Set default semaphore delay to 10ms which is a well
-		 * tested value */
-		eeprom->semaphore_delay = 10;
-		/* Clear EEPROM page size, it will be initialized as needed */
-		eeprom->word_page_size = 0;
-
-		/*
-		 * Check for EEPROM present first.
-		 * If not present leave as none
-		 */
-		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-		if (eec & IXGBE_EEC_PRES) {
-			eeprom->type = ixgbe_eeprom_spi;
-
-			/*
-			 * SPI EEPROM is assumed here.  This code would need to
-			 * change if a future EEPROM is not SPI.
-			 */
-			eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
-					    IXGBE_EEC_SIZE_SHIFT);
-			eeprom->word_size = 1 << (eeprom_size +
-					     IXGBE_EEPROM_WORD_SIZE_SHIFT);
-		}
-
-		if (eec & IXGBE_EEC_ADDR_SIZE)
-			eeprom->address_bits = 16;
-		else
-			eeprom->address_bits = 8;
-		hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: "
-			  "%d\n", eeprom->type, eeprom->word_size,
-			  eeprom->address_bits);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to write
- *  @words: number of word(s)
- *  @data: 16 bit word(s) to write to EEPROM
- *
- *  Reads 16 bit word(s) from EEPROM through bit-bang method
- **/
-s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-					       u16 words, u16 *data)
-{
-	s32 status = 0;
-	u16 i, count;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (words == 0) {
-		status = IXGBE_ERR_INVALID_ARGUMENT;
-		goto out;
-	}
-
-	if (offset + words > hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	/*
-	 * The EEPROM page size cannot be queried from the chip. We do lazy
-	 * initialization. It is worth to do that when we write large buffer.
-	 */
-	if ((hw->eeprom.word_page_size == 0) &&
-	    (words > IXGBE_EEPROM_PAGE_SIZE_MAX))
-		ixgbe_detect_eeprom_page_size_generic(hw, offset);
-
-	/*
-	 * We cannot hold synchronization semaphores for too long
-	 * to avoid other entity starvation. However it is more efficient
-	 * to read in bursts than synchronizing access for each word.
-	 */
-	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
-		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
-			IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
-		status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i,
-							    count, &data[i]);
-
-		if (status != 0)
-			break;
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be written to
- *  @words: number of word(s)
- *  @data: 16 bit word(s) to be written to the EEPROM
- *
- *  If ixgbe_eeprom_update_checksum is not called after this function, the
- *  EEPROM will most likely contain an invalid checksum.
- **/
-static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
-					      u16 words, u16 *data)
-{
-	s32 status;
-	u16 word;
-	u16 page_size;
-	u16 i;
-	u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
-
-	/* Prepare the EEPROM for writing  */
-	status = ixgbe_acquire_eeprom(hw);
-
-	if (status == 0) {
-		if (ixgbe_ready_eeprom(hw) != 0) {
-			ixgbe_release_eeprom(hw);
-			status = IXGBE_ERR_EEPROM;
-		}
-	}
-
-	if (status == 0) {
-		for (i = 0; i < words; i++) {
-			ixgbe_standby_eeprom(hw);
-
-			/*  Send the WRITE ENABLE command (8 bit opcode )  */
-			ixgbe_shift_out_eeprom_bits(hw,
-						   IXGBE_EEPROM_WREN_OPCODE_SPI,
-						   IXGBE_EEPROM_OPCODE_BITS);
-
-			ixgbe_standby_eeprom(hw);
-
-			/*
-			 * Some SPI eeproms use the 8th address bit embedded
-			 * in the opcode
-			 */
-			if ((hw->eeprom.address_bits == 8) &&
-			    ((offset + i) >= 128))
-				write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
-
-			/* Send the Write command (8-bit opcode + addr) */
-			ixgbe_shift_out_eeprom_bits(hw, write_opcode,
-						    IXGBE_EEPROM_OPCODE_BITS);
-			ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
-						    hw->eeprom.address_bits);
-
-			page_size = hw->eeprom.word_page_size;
-
-			/* Send the data in burst via SPI*/
-			do {
-				word = data[i];
-				word = (word >> 8) | (word << 8);
-				ixgbe_shift_out_eeprom_bits(hw, word, 16);
-
-				if (page_size == 0)
-					break;
-
-				/* do not wrap around page */
-				if (((offset + i) & (page_size - 1)) ==
-				    (page_size - 1))
-					break;
-			} while (++i < words);
-
-			ixgbe_standby_eeprom(hw);
-			msleep(10);
-		}
-		/* Done with writing - release the EEPROM */
-		ixgbe_release_eeprom(hw);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be written to
- *  @data: 16 bit word to be written to the EEPROM
- *
- *  If ixgbe_eeprom_update_checksum is not called after this function, the
- *  EEPROM will most likely contain an invalid checksum.
- **/
-s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
-{
-	s32 status;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (offset >= hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data);
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be read
- *  @data: read 16 bit words(s) from EEPROM
- *  @words: number of word(s)
- *
- *  Reads 16 bit word(s) from EEPROM through bit-bang method
- **/
-s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-					      u16 words, u16 *data)
-{
-	s32 status = 0;
-	u16 i, count;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (words == 0) {
-		status = IXGBE_ERR_INVALID_ARGUMENT;
-		goto out;
-	}
-
-	if (offset + words > hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	/*
-	 * We cannot hold synchronization semaphores for too long
-	 * to avoid other entity starvation. However it is more efficient
-	 * to read in bursts than synchronizing access for each word.
-	 */
-	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
-		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
-			IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
-
-		status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i,
-							   count, &data[i]);
-
-		if (status != 0)
-			break;
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be read
- *  @words: number of word(s)
- *  @data: read 16 bit word(s) from EEPROM
- *
- *  Reads 16 bit word(s) from EEPROM through bit-bang method
- **/
-static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
-					     u16 words, u16 *data)
-{
-	s32 status;
-	u16 word_in;
-	u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI;
-	u16 i;
-
-	/* Prepare the EEPROM for reading  */
-	status = ixgbe_acquire_eeprom(hw);
-
-	if (status == 0) {
-		if (ixgbe_ready_eeprom(hw) != 0) {
-			ixgbe_release_eeprom(hw);
-			status = IXGBE_ERR_EEPROM;
-		}
-	}
-
-	if (status == 0) {
-		for (i = 0; i < words; i++) {
-			ixgbe_standby_eeprom(hw);
-			/*
-			 * Some SPI eeproms use the 8th address bit embedded
-			 * in the opcode
-			 */
-			if ((hw->eeprom.address_bits == 8) &&
-			    ((offset + i) >= 128))
-				read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
-
-			/* Send the READ command (opcode + addr) */
-			ixgbe_shift_out_eeprom_bits(hw, read_opcode,
-						    IXGBE_EEPROM_OPCODE_BITS);
-			ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
-						    hw->eeprom.address_bits);
-
-			/* Read the data. */
-			word_in = ixgbe_shift_in_eeprom_bits(hw, 16);
-			data[i] = (word_in >> 8) | (word_in << 8);
-		}
-
-		/* End this read operation */
-		ixgbe_release_eeprom(hw);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be read
- *  @data: read 16 bit value from EEPROM
- *
- *  Reads 16 bit value from EEPROM through bit-bang method
- **/
-s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-				       u16 *data)
-{
-	s32 status;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (offset >= hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD
- *  @hw: pointer to hardware structure
- *  @offset: offset of word in the EEPROM to read
- *  @words: number of word(s)
- *  @data: 16 bit word(s) from the EEPROM
- *
- *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
- **/
-s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
-				   u16 words, u16 *data)
-{
-	u32 eerd;
-	s32 status = 0;
-	u32 i;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (words == 0) {
-		status = IXGBE_ERR_INVALID_ARGUMENT;
-		goto out;
-	}
-
-	if (offset >= hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	for (i = 0; i < words; i++) {
-		eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) +
-		       IXGBE_EEPROM_RW_REG_START;
-
-		IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd);
-		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ);
-
-		if (status == 0) {
-			data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >>
-				   IXGBE_EEPROM_RW_REG_DATA);
-		} else {
-			hw_dbg(hw, "Eeprom read timed out\n");
-			goto out;
-		}
-	}
-out:
-	return status;
-}
-
-/**
- *  ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size
- *  @hw: pointer to hardware structure
- *  @offset: offset within the EEPROM to be used as a scratch pad
- *
- *  Discover EEPROM page size by writing marching data at given offset.
- *  This function is called only when we are writing a new large buffer
- *  at given offset so the data would be overwritten anyway.
- **/
-static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
-						 u16 offset)
-{
-	u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX];
-	s32 status = 0;
-	u16 i;
-
-	for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++)
-		data[i] = i;
-
-	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX;
-	status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset,
-					     IXGBE_EEPROM_PAGE_SIZE_MAX, data);
-	hw->eeprom.word_page_size = 0;
-	if (status != 0)
-		goto out;
-
-	status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
-	if (status != 0)
-		goto out;
-
-	/*
-	 * When writing in burst more than the actual page size
-	 * EEPROM address wraps around current page.
-	 */
-	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0];
-
-	hw_dbg(hw, "Detected EEPROM page size = %d words.",
-		  hw->eeprom.word_page_size);
-out:
-	return status;
-}
-
-/**
- *  ixgbe_read_eerd_generic - Read EEPROM word using EERD
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM using the EERD register.
- **/
-s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
-{
-	return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data);
-}
-
-/**
- *  ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to write
- *  @words: number of word(s)
- *  @data: word(s) write to the EEPROM
- *
- *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
- **/
-s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
-				    u16 words, u16 *data)
-{
-	u32 eewr;
-	s32 status = 0;
-	u16 i;
-
-	hw->eeprom.ops.init_params(hw);
-
-	if (words == 0) {
-		status = IXGBE_ERR_INVALID_ARGUMENT;
-		goto out;
-	}
-
-	if (offset >= hw->eeprom.word_size) {
-		status = IXGBE_ERR_EEPROM;
-		goto out;
-	}
-
-	for (i = 0; i < words; i++) {
-		eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) |
-			(data[i] << IXGBE_EEPROM_RW_REG_DATA) |
-			IXGBE_EEPROM_RW_REG_START;
-
-		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
-		if (status != 0) {
-			hw_dbg(hw, "Eeprom write EEWR timed out\n");
-			goto out;
-		}
-
-		IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr);
-
-		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
-		if (status != 0) {
-			hw_dbg(hw, "Eeprom write EEWR timed out\n");
-			goto out;
-		}
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_write_eewr_generic - Write EEPROM word using EEWR
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to write
- *  @data: word write to the EEPROM
- *
- *  Write a 16 bit word to the EEPROM using the EEWR register.
- **/
-s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
-{
-	return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data);
-}
-
-/**
- *  ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status
- *  @hw: pointer to hardware structure
- *  @ee_reg: EEPROM flag for polling
- *
- *  Polls the status bit (bit 1) of the EERD or EEWR to determine when the
- *  read or write is done respectively.
- **/
-s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
-{
-	u32 i;
-	u32 reg;
-	s32 status = IXGBE_ERR_EEPROM;
-
-	for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) {
-		if (ee_reg == IXGBE_NVM_POLL_READ)
-			reg = IXGBE_READ_REG(hw, IXGBE_EERD);
-		else
-			reg = IXGBE_READ_REG(hw, IXGBE_EEWR);
-
-		if (reg & IXGBE_EEPROM_RW_REG_DONE) {
-			status = 0;
-			break;
-		}
-		udelay(5);
-	}
-	return status;
-}
-
-/**
- *  ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang
- *  @hw: pointer to hardware structure
- *
- *  Prepares EEPROM for access using bit-bang method. This function should
- *  be called before issuing a command to the EEPROM.
- **/
-static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u32 eec;
-	u32 i;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)
-	    != 0)
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	if (status == 0) {
-		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-		/* Request EEPROM Access */
-		eec |= IXGBE_EEC_REQ;
-		IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-
-		for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) {
-			eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-			if (eec & IXGBE_EEC_GNT)
-				break;
-			udelay(5);
-		}
-
-		/* Release if grant not acquired */
-		if (!(eec & IXGBE_EEC_GNT)) {
-			eec &= ~IXGBE_EEC_REQ;
-			IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-			hw_dbg(hw, "Could not acquire EEPROM grant\n");
-
-			hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-			status = IXGBE_ERR_EEPROM;
-		}
-
-		/* Setup EEPROM for Read/Write */
-		if (status == 0) {
-			/* Clear CS and SK */
-			eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK);
-			IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-			IXGBE_WRITE_FLUSH(hw);
-			udelay(1);
-		}
-	}
-	return status;
-}
-
-/**
- *  ixgbe_get_eeprom_semaphore - Get hardware semaphore
- *  @hw: pointer to hardware structure
- *
- *  Sets the hardware semaphores so EEPROM access can occur for bit-bang method
- **/
-static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_EEPROM;
-	u32 timeout = 2000;
-	u32 i;
-	u32 swsm;
-
-	/* Get SMBI software semaphore between device drivers first */
-	for (i = 0; i < timeout; i++) {
-		/*
-		 * If the SMBI bit is 0 when we read it, then the bit will be
-		 * set and we have the semaphore
-		 */
-		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-		if (!(swsm & IXGBE_SWSM_SMBI)) {
-			status = 0;
-			break;
-		}
-		udelay(50);
-	}
-
-	if (i == timeout) {
-		hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore "
-			 "not granted.\n");
-		/*
-		 * this release is particularly important because our attempts
-		 * above to get the semaphore may have succeeded, and if there
-		 * was a timeout, we should unconditionally clear the semaphore
-		 * bits to free the driver to make progress
-		 */
-		ixgbe_release_eeprom_semaphore(hw);
-
-		udelay(50);
-		/*
-		 * one last try
-		 * If the SMBI bit is 0 when we read it, then the bit will be
-		 * set and we have the semaphore
-		 */
-		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-		if (!(swsm & IXGBE_SWSM_SMBI))
-			status = 0;
-	}
-
-	/* Now get the semaphore between SW/FW through the SWESMBI bit */
-	if (status == 0) {
-		for (i = 0; i < timeout; i++) {
-			swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-
-			/* Set the SW EEPROM semaphore bit to request access */
-			swsm |= IXGBE_SWSM_SWESMBI;
-			IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
-
-			/*
-			 * If we set the bit successfully then we got the
-			 * semaphore.
-			 */
-			swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-			if (swsm & IXGBE_SWSM_SWESMBI)
-				break;
-
-			udelay(50);
-		}
-
-		/*
-		 * Release semaphores and return error if SW EEPROM semaphore
-		 * was not granted because we don't have access to the EEPROM
-		 */
-		if (i >= timeout) {
-			hw_dbg(hw, "SWESMBI Software EEPROM semaphore "
-				 "not granted.\n");
-			ixgbe_release_eeprom_semaphore(hw);
-			status = IXGBE_ERR_EEPROM;
-		}
-	} else {
-		hw_dbg(hw, "Software semaphore SMBI between device drivers "
-			 "not granted.\n");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_release_eeprom_semaphore - Release hardware semaphore
- *  @hw: pointer to hardware structure
- *
- *  This function clears hardware semaphore bits.
- **/
-static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw)
-{
-	u32 swsm;
-
-	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-
-	/* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */
-	swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI);
-	IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
-	IXGBE_WRITE_FLUSH(hw);
-}
-
-/**
- *  ixgbe_ready_eeprom - Polls for EEPROM ready
- *  @hw: pointer to hardware structure
- **/
-static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u16 i;
-	u8 spi_stat_reg;
-
-	/*
-	 * Read "Status Register" repeatedly until the LSB is cleared.  The
-	 * EEPROM will signal that the command has been completed by clearing
-	 * bit 0 of the internal status register.  If it's not cleared within
-	 * 5 milliseconds, then error out.
-	 */
-	for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) {
-		ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI,
-					    IXGBE_EEPROM_OPCODE_BITS);
-		spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8);
-		if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI))
-			break;
-
-		udelay(5);
-		ixgbe_standby_eeprom(hw);
-	};
-
-	/*
-	 * On some parts, SPI write time could vary from 0-20mSec on 3.3V
-	 * devices (and only 0-5mSec on 5V devices)
-	 */
-	if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) {
-		hw_dbg(hw, "SPI EEPROM Status error\n");
-		status = IXGBE_ERR_EEPROM;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_standby_eeprom - Returns EEPROM to a "standby" state
- *  @hw: pointer to hardware structure
- **/
-static void ixgbe_standby_eeprom(struct ixgbe_hw *hw)
-{
-	u32 eec;
-
-	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-	/* Toggle CS to flush commands */
-	eec |= IXGBE_EEC_CS;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(1);
-	eec &= ~IXGBE_EEC_CS;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(1);
-}
-
-/**
- *  ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM.
- *  @hw: pointer to hardware structure
- *  @data: data to send to the EEPROM
- *  @count: number of bits to shift out
- **/
-static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
-					u16 count)
-{
-	u32 eec;
-	u32 mask;
-	u32 i;
-
-	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-	/*
-	 * Mask is used to shift "count" bits of "data" out to the EEPROM
-	 * one bit at a time.  Determine the starting bit based on count
-	 */
-	mask = 0x01 << (count - 1);
-
-	for (i = 0; i < count; i++) {
-		/*
-		 * A "1" is shifted out to the EEPROM by setting bit "DI" to a
-		 * "1", and then raising and then lowering the clock (the SK
-		 * bit controls the clock input to the EEPROM).  A "0" is
-		 * shifted out to the EEPROM by setting "DI" to "0" and then
-		 * raising and then lowering the clock.
-		 */
-		if (data & mask)
-			eec |= IXGBE_EEC_DI;
-		else
-			eec &= ~IXGBE_EEC_DI;
-
-		IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-		IXGBE_WRITE_FLUSH(hw);
-
-		udelay(1);
-
-		ixgbe_raise_eeprom_clk(hw, &eec);
-		ixgbe_lower_eeprom_clk(hw, &eec);
-
-		/*
-		 * Shift mask to signify next bit of data to shift in to the
-		 * EEPROM
-		 */
-		mask = mask >> 1;
-	};
-
-	/* We leave the "DI" bit set to "0" when we leave this routine. */
-	eec &= ~IXGBE_EEC_DI;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-	IXGBE_WRITE_FLUSH(hw);
-}
-
-/**
- *  ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM
- *  @hw: pointer to hardware structure
- **/
-static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count)
-{
-	u32 eec;
-	u32 i;
-	u16 data = 0;
-
-	/*
-	 * In order to read a register from the EEPROM, we need to shift
-	 * 'count' bits in from the EEPROM. Bits are "shifted in" by raising
-	 * the clock input to the EEPROM (setting the SK bit), and then reading
-	 * the value of the "DO" bit.  During this "shifting in" process the
-	 * "DI" bit should always be clear.
-	 */
-	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-	eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI);
-
-	for (i = 0; i < count; i++) {
-		data = data << 1;
-		ixgbe_raise_eeprom_clk(hw, &eec);
-
-		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-		eec &= ~(IXGBE_EEC_DI);
-		if (eec & IXGBE_EEC_DO)
-			data |= 1;
-
-		ixgbe_lower_eeprom_clk(hw, &eec);
-	}
-
-	return data;
-}
-
-/**
- *  ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input.
- *  @hw: pointer to hardware structure
- *  @eec: EEC register's current value
- **/
-static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
-{
-	/*
-	 * Raise the clock input to the EEPROM
-	 * (setting the SK bit), then delay
-	 */
-	*eec = *eec | IXGBE_EEC_SK;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(1);
-}
-
-/**
- *  ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input.
- *  @hw: pointer to hardware structure
- *  @eecd: EECD's current value
- **/
-static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
-{
-	/*
-	 * Lower the clock input to the EEPROM (clearing the SK bit), then
-	 * delay
-	 */
-	*eec = *eec & ~IXGBE_EEC_SK;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(1);
-}
-
-/**
- *  ixgbe_release_eeprom - Release EEPROM, release semaphores
- *  @hw: pointer to hardware structure
- **/
-static void ixgbe_release_eeprom(struct ixgbe_hw *hw)
-{
-	u32 eec;
-
-	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-	eec |= IXGBE_EEC_CS;  /* Pull CS high */
-	eec &= ~IXGBE_EEC_SK; /* Lower SCK */
-
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-	IXGBE_WRITE_FLUSH(hw);
-
-	udelay(1);
-
-	/* Stop requesting EEPROM access */
-	eec &= ~IXGBE_EEC_REQ;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-
-	/* Delay before attempt to obtain semaphore again to allow FW access */
-	msleep(hw->eeprom.semaphore_delay);
-}
-
-/**
- *  ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum
- *  @hw: pointer to hardware structure
- **/
-u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
-{
-	u16 i;
-	u16 j;
-	u16 checksum = 0;
-	u16 length = 0;
-	u16 pointer = 0;
-	u16 word = 0;
-
-	/* Include 0x0-0x3F in the checksum */
-	for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
-		if (hw->eeprom.ops.read(hw, i, &word) != 0) {
-			hw_dbg(hw, "EEPROM read failed\n");
-			break;
-		}
-		checksum += word;
-	}
-
-	/* Include all data from pointers except for the fw pointer */
-	for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
-		hw->eeprom.ops.read(hw, i, &pointer);
-
-		/* Make sure the pointer seems valid */
-		if (pointer != 0xFFFF && pointer != 0) {
-			hw->eeprom.ops.read(hw, pointer, &length);
-
-			if (length != 0xFFFF && length != 0) {
-				for (j = pointer+1; j <= pointer+length; j++) {
-					hw->eeprom.ops.read(hw, j, &word);
-					checksum += word;
-				}
-			}
-		}
-	}
-
-	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
-
-	return checksum;
-}
-
-/**
- *  ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum
- *  @hw: pointer to hardware structure
- *  @checksum_val: calculated checksum
- *
- *  Performs checksum calculation and validates the EEPROM checksum.  If the
- *  caller does not need checksum_val, the value can be NULL.
- **/
-s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
-					   u16 *checksum_val)
-{
-	s32 status;
-	u16 checksum;
-	u16 read_checksum = 0;
-
-	/*
-	 * Read the first word from the EEPROM. If this times out or fails, do
-	 * not continue or we could be in for a very long wait while every
-	 * EEPROM read fails
-	 */
-	status = hw->eeprom.ops.read(hw, 0, &checksum);
-
-	if (status == 0) {
-		checksum = hw->eeprom.ops.calc_checksum(hw);
-
-		hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum);
-
-		/*
-		 * Verify read checksum from EEPROM is the same as
-		 * calculated checksum
-		 */
-		if (read_checksum != checksum)
-			status = IXGBE_ERR_EEPROM_CHECKSUM;
-
-		/* If the user cares, return the calculated checksum */
-		if (checksum_val)
-			*checksum_val = checksum;
-	} else {
-		hw_dbg(hw, "EEPROM read failed\n");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
-{
-	s32 status;
-	u16 checksum;
-
-	/*
-	 * Read the first word from the EEPROM. If this times out or fails, do
-	 * not continue or we could be in for a very long wait while every
-	 * EEPROM read fails
-	 */
-	status = hw->eeprom.ops.read(hw, 0, &checksum);
-
-	if (status == 0) {
-		checksum = hw->eeprom.ops.calc_checksum(hw);
-		status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM,
-					      checksum);
-	} else {
-		hw_dbg(hw, "EEPROM read failed\n");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_validate_mac_addr - Validate MAC address
- *  @mac_addr: pointer to MAC address.
- *
- *  Tests a MAC address to ensure it is a valid Individual Address
- **/
-s32 ixgbe_validate_mac_addr(u8 *mac_addr)
-{
-	s32 status = 0;
-
-	/* Make sure it is not a multicast address */
-	if (IXGBE_IS_MULTICAST(mac_addr)) {
-		hw_dbg(hw, "MAC address is multicast\n");
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-	/* Not a broadcast address */
-	} else if (IXGBE_IS_BROADCAST(mac_addr)) {
-		hw_dbg(hw, "MAC address is broadcast\n");
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-	/* Reject the zero address */
-	} else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
-		   mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) {
-		hw_dbg(hw, "MAC address is all zeros\n");
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-	}
-	return status;
-}
-
-/**
- *  ixgbe_set_rar_generic - Set Rx address register
- *  @hw: pointer to hardware structure
- *  @index: Receive address register to write
- *  @addr: Address to put into receive address register
- *  @vmdq: VMDq "set" or "pool" index
- *  @enable_addr: set flag that address is active
- *
- *  Puts an ethernet address into a receive address register.
- **/
-s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-			  u32 enable_addr)
-{
-	u32 rar_low, rar_high;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/* Make sure we are using a valid rar index range */
-	if (index >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", index);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	/* setup VMDq pool selection before this RAR gets enabled */
-	hw->mac.ops.set_vmdq(hw, index, vmdq);
-
-	/*
-	 * HW expects these in little endian so we reverse the byte
-	 * order from network order (big endian) to little endian
-	 */
-	rar_low = ((u32)addr[0] |
-		   ((u32)addr[1] << 8) |
-		   ((u32)addr[2] << 16) |
-		   ((u32)addr[3] << 24));
-	/*
-	 * Some parts put the VMDq setting in the extra RAH bits,
-	 * so save everything except the lower 16 bits that hold part
-	 * of the address and the address valid bit.
-	 */
-	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
-	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
-	rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8));
-
-	if (enable_addr != 0)
-		rar_high |= IXGBE_RAH_AV;
-
-	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low);
-	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
-
-	return 0;
-}
-
-/**
- *  ixgbe_clear_rar_generic - Remove Rx address register
- *  @hw: pointer to hardware structure
- *  @index: Receive address register to write
- *
- *  Clears an ethernet address from a receive address register.
- **/
-s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
-{
-	u32 rar_high;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/* Make sure we are using a valid rar index range */
-	if (index >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", index);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	/*
-	 * Some parts put the VMDq setting in the extra RAH bits,
-	 * so save everything except the lower 16 bits that hold part
-	 * of the address and the address valid bit.
-	 */
-	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
-	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
-
-	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
-	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
-
-	/* clear VMDq pool/queue selection for this RAR */
-	hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL);
-
-	return 0;
-}
-
-/**
- *  ixgbe_init_rx_addrs_generic - Initializes receive address filters.
- *  @hw: pointer to hardware structure
- *
- *  Places the MAC address in receive address register 0 and clears the rest
- *  of the receive address registers. Clears the multicast table. Assumes
- *  the receiver is in reset when the routine is called.
- **/
-s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
-{
-	u32 i;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/*
-	 * If the current mac address is valid, assume it is a software override
-	 * to the permanent address.
-	 * Otherwise, use the permanent address from the eeprom.
-	 */
-	if (ixgbe_validate_mac_addr(hw->mac.addr) ==
-	    IXGBE_ERR_INVALID_MAC_ADDR) {
-		/* Get the MAC address from the RAR0 for later reference */
-		hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
-
-		hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ",
-			  hw->mac.addr[0], hw->mac.addr[1],
-			  hw->mac.addr[2]);
-		hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
-			  hw->mac.addr[4], hw->mac.addr[5]);
-	} else {
-		/* Setup the receive address. */
-		hw_dbg(hw, "Overriding MAC Address in RAR[0]\n");
-		hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ",
-			  hw->mac.addr[0], hw->mac.addr[1],
-			  hw->mac.addr[2]);
-		hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
-			  hw->mac.addr[4], hw->mac.addr[5]);
-
-		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
-
-		/* clear VMDq pool/queue selection for RAR 0 */
-		hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
-	}
-	hw->addr_ctrl.overflow_promisc = 0;
-
-	hw->addr_ctrl.rar_used_count = 1;
-
-	/* Zero out the other receive addresses. */
-	hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1);
-	for (i = 1; i < rar_entries; i++) {
-		IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0);
-	}
-
-	/* Clear the MTA */
-	hw->addr_ctrl.mta_in_use = 0;
-	IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
-
-	hw_dbg(hw, " Clearing MTA\n");
-	for (i = 0; i < hw->mac.mcft_size; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
-
-	ixgbe_init_uta_tables(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_add_uc_addr - Adds a secondary unicast address.
- *  @hw: pointer to hardware structure
- *  @addr: new address
- *
- *  Adds it to unused receive address register or goes into promiscuous mode.
- **/
-void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
-{
-	u32 rar_entries = hw->mac.num_rar_entries;
-	u32 rar;
-
-	hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n",
-		  addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
-
-	/*
-	 * Place this address in the RAR if there is room,
-	 * else put the controller into promiscuous mode
-	 */
-	if (hw->addr_ctrl.rar_used_count < rar_entries) {
-		rar = hw->addr_ctrl.rar_used_count;
-		hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
-		hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar);
-		hw->addr_ctrl.rar_used_count++;
-	} else {
-		hw->addr_ctrl.overflow_promisc++;
-	}
-
-	hw_dbg(hw, "ixgbe_add_uc_addr Complete\n");
-}
-
-/**
- *  ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses
- *  @hw: pointer to hardware structure
- *  @addr_list: the list of new addresses
- *  @addr_count: number of addresses
- *  @next: iterator function to walk the address list
- *
- *  The given list replaces any existing list.  Clears the secondary addrs from
- *  receive address registers.  Uses unused receive address registers for the
- *  first secondary addresses, and falls back to promiscuous mode as needed.
- *
- *  Drivers using secondary unicast addresses must set user_set_promisc when
- *  manually putting the device into promiscuous mode.
- **/
-s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
-				      u32 addr_count, ixgbe_mc_addr_itr next)
-{
-	u8 *addr;
-	u32 i;
-	u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
-	u32 uc_addr_in_use;
-	u32 fctrl;
-	u32 vmdq;
-
-	/*
-	 * Clear accounting of old secondary address list,
-	 * don't count RAR[0]
-	 */
-	uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1;
-	hw->addr_ctrl.rar_used_count -= uc_addr_in_use;
-	hw->addr_ctrl.overflow_promisc = 0;
-
-	/* Zero out the other receive addresses */
-	hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1);
-	for (i = 0; i < uc_addr_in_use; i++) {
-		IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0);
-	}
-
-	/* Add the new addresses */
-	for (i = 0; i < addr_count; i++) {
-		hw_dbg(hw, " Adding the secondary addresses:\n");
-		addr = next(hw, &addr_list, &vmdq);
-		ixgbe_add_uc_addr(hw, addr, vmdq);
-	}
-
-	if (hw->addr_ctrl.overflow_promisc) {
-		/* enable promisc if not already in overflow or set by user */
-		if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
-			hw_dbg(hw, " Entering address overflow promisc mode\n");
-			fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-			fctrl |= IXGBE_FCTRL_UPE;
-			IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
-		}
-	} else {
-		/* only disable if set by overflow, not by user */
-		if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
-			hw_dbg(hw, " Leaving address overflow promisc mode\n");
-			fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-			fctrl &= ~IXGBE_FCTRL_UPE;
-			IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
-		}
-	}
-
-	hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n");
-	return 0;
-}
-
-/**
- *  ixgbe_mta_vector - Determines bit-vector in multicast table to set
- *  @hw: pointer to hardware structure
- *  @mc_addr: the multicast address
- *
- *  Extracts the 12 bits, from a multicast address, to determine which
- *  bit-vector to set in the multicast table. The hardware uses 12 bits, from
- *  incoming rx multicast addresses, to determine the bit-vector to check in
- *  the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
- *  by the MO field of the MCSTCTRL. The MO field is set during initialization
- *  to mc_filter_type.
- **/
-static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
-{
-	u32 vector = 0;
-
-	switch (hw->mac.mc_filter_type) {
-	case 0:   /* use bits [47:36] of the address */
-		vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
-		break;
-	case 1:   /* use bits [46:35] of the address */
-		vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
-		break;
-	case 2:   /* use bits [45:34] of the address */
-		vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
-		break;
-	case 3:   /* use bits [43:32] of the address */
-		vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
-		break;
-	default:  /* Invalid mc_filter_type */
-		hw_dbg(hw, "MC filter type param set incorrectly\n");
-		break;
-	}
-
-	/* vector can only be 12-bits or boundary will be exceeded */
-	vector &= 0xFFF;
-	return vector;
-}
-
-/**
- *  ixgbe_set_mta - Set bit-vector in multicast table
- *  @hw: pointer to hardware structure
- *  @hash_value: Multicast address hash value
- *
- *  Sets the bit-vector in the multicast table.
- **/
-void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
-{
-	u32 vector;
-	u32 vector_bit;
-	u32 vector_reg;
-
-	hw->addr_ctrl.mta_in_use++;
-
-	vector = ixgbe_mta_vector(hw, mc_addr);
-	hw_dbg(hw, " bit-vector = 0x%03X\n", vector);
-
-	/*
-	 * The MTA is a register array of 128 32-bit registers. It is treated
-	 * like an array of 4096 bits.  We want to set bit
-	 * BitArray[vector_value]. So we figure out what register the bit is
-	 * in, read it, OR in the new bit, then write back the new value.  The
-	 * register is determined by the upper 7 bits of the vector value and
-	 * the bit within that register are determined by the lower 5 bits of
-	 * the value.
-	 */
-	vector_reg = (vector >> 5) & 0x7F;
-	vector_bit = vector & 0x1F;
-	hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
-}
-
-/**
- *  ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses
- *  @hw: pointer to hardware structure
- *  @mc_addr_list: the list of new multicast addresses
- *  @mc_addr_count: number of addresses
- *  @next: iterator function to walk the multicast address list
- *  @clear: flag, when set clears the table beforehand
- *
- *  When the clear flag is set, the given list replaces any existing list.
- *  Hashes the given addresses into the multicast table.
- **/
-s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
-				      u32 mc_addr_count, ixgbe_mc_addr_itr next,
-				      bool clear)
-{
-	u32 i;
-	u32 vmdq;
-
-	/*
-	 * Set the new number of MC addresses that we are being requested to
-	 * use.
-	 */
-	hw->addr_ctrl.num_mc_addrs = mc_addr_count;
-	hw->addr_ctrl.mta_in_use = 0;
-
-	/* Clear mta_shadow */
-	if (clear) {
-		hw_dbg(hw, " Clearing MTA\n");
-		memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
-	}
-
-	/* Update mta_shadow */
-	for (i = 0; i < mc_addr_count; i++) {
-		hw_dbg(hw, " Adding the multicast addresses:\n");
-		ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq));
-	}
-
-	/* Enable mta */
-	for (i = 0; i < hw->mac.mcft_size; i++)
-		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i,
-				      hw->mac.mta_shadow[i]);
-
-	if (hw->addr_ctrl.mta_in_use > 0)
-		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL,
-				IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
-
-	hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n");
-	return 0;
-}
-
-/**
- *  ixgbe_enable_mc_generic - Enable multicast address in RAR
- *  @hw: pointer to hardware structure
- *
- *  Enables multicast address in RAR and the use of the multicast hash table.
- **/
-s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
-
-	if (a->mta_in_use > 0)
-		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE |
-				hw->mac.mc_filter_type);
-
-	return 0;
-}
-
-/**
- *  ixgbe_disable_mc_generic - Disable multicast address in RAR
- *  @hw: pointer to hardware structure
- *
- *  Disables multicast address in RAR and the use of the multicast hash table.
- **/
-s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
-
-	if (a->mta_in_use > 0)
-		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
-
-	return 0;
-}
-
-/**
- *  ixgbe_fc_enable_generic - Enable flow control
- *  @hw: pointer to hardware structure
- *
- *  Enable flow control according to the current settings.
- **/
-s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-	u32 mflcn_reg, fccfg_reg;
-	u32 reg;
-	u32 fcrtl, fcrth;
-	int i;
-
-	/* Validate the water mark configuration */
-	if (!hw->fc.pause_time) {
-		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
-		goto out;
-	}
-
-	/* Low water mark of zero causes XOFF floods */
-	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
-		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
-		    hw->fc.high_water[i]) {
-			if (!hw->fc.low_water[i] ||
-			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
-				hw_dbg(hw, "Invalid water mark configuration\n");
-				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
-				goto out;
-			}
-		}
-	}
-
-	/* Negotiate the fc mode to use */
-	ixgbe_fc_autoneg(hw);
-
-	/* Disable any previous flow control settings */
-	mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
-	mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE);
-
-	fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
-	fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY);
-
-	/*
-	 * The possible values of fc.current_mode are:
-	 * 0: Flow control is completely disabled
-	 * 1: Rx flow control is enabled (we can receive pause frames,
-	 *    but not send pause frames).
-	 * 2: Tx flow control is enabled (we can send pause frames but
-	 *    we do not support receiving pause frames).
-	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
-	 * other: Invalid.
-	 */
-	switch (hw->fc.current_mode) {
-	case ixgbe_fc_none:
-		/*
-		 * Flow control is disabled by software override or autoneg.
-		 * The code below will actually disable it in the HW.
-		 */
-		break;
-	case ixgbe_fc_rx_pause:
-		/*
-		 * Rx Flow control is enabled and Tx Flow control is
-		 * disabled by software override. Since there really
-		 * isn't a way to advertise that we are capable of RX
-		 * Pause ONLY, we will advertise that we support both
-		 * symmetric and asymmetric Rx PAUSE.  Later, we will
-		 * disable the adapter's ability to send PAUSE frames.
-		 */
-		mflcn_reg |= IXGBE_MFLCN_RFCE;
-		break;
-	case ixgbe_fc_tx_pause:
-		/*
-		 * Tx Flow control is enabled, and Rx Flow control is
-		 * disabled by software override.
-		 */
-		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
-		break;
-	case ixgbe_fc_full:
-		/* Flow control (both Rx and Tx) is enabled by SW override. */
-		mflcn_reg |= IXGBE_MFLCN_RFCE;
-		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
-		break;
-	default:
-		hw_dbg(hw, "Flow control param set incorrectly\n");
-		ret_val = IXGBE_ERR_CONFIG;
-		goto out;
-		break;
-	}
-
-	/* Set 802.3x based flow control settings. */
-	mflcn_reg |= IXGBE_MFLCN_DPF;
-	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
-	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
-
-
-	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
-	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
-		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
-		    hw->fc.high_water[i]) {
-			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
-			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
-		} else {
-			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
-			/*
-			 * In order to prevent Tx hangs when the internal Tx
-			 * switch is enabled we must set the high water mark
-			 * to the maximum FCRTH value.  This allows the Tx
-			 * switch to function even under heavy Rx workloads.
-			 */
-			fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32;
-		}
-
-		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth);
-	}
-
-	/* Configure pause time (2 TCs per register) */
-	reg = hw->fc.pause_time * 0x00010001;
-	for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
-		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
-
-	/* Configure flow control refresh threshold value */
-	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_negotiate_fc - Negotiate flow control
- *  @hw: pointer to hardware structure
- *  @adv_reg: flow control advertised settings
- *  @lp_reg: link partner's flow control settings
- *  @adv_sym: symmetric pause bit in advertisement
- *  @adv_asm: asymmetric pause bit in advertisement
- *  @lp_sym: symmetric pause bit in link partner advertisement
- *  @lp_asm: asymmetric pause bit in link partner advertisement
- *
- *  Find the intersection between advertised settings and link partner's
- *  advertised settings
- **/
-static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
-			      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
-{
-	if (!adv_reg || !lp_reg)
-		return IXGBE_ERR_FC_NOT_NEGOTIATED;
-
-	if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) {
-		/*
-		 * Now we need to check if the user selected Rx ONLY
-		 * of pause frames.  In this case, we had to advertise
-		 * FULL flow control because we could not advertise RX
-		 * ONLY. Hence, we must now check to see if we need to
-		 * turn OFF the TRANSMISSION of PAUSE frames.
-		 */
-		if (hw->fc.requested_mode == ixgbe_fc_full) {
-			hw->fc.current_mode = ixgbe_fc_full;
-			hw_dbg(hw, "Flow Control = FULL.\n");
-		} else {
-			hw->fc.current_mode = ixgbe_fc_rx_pause;
-			hw_dbg(hw, "Flow Control=RX PAUSE frames only\n");
-		}
-	} else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) &&
-		   (lp_reg & lp_sym) && (lp_reg & lp_asm)) {
-		hw->fc.current_mode = ixgbe_fc_tx_pause;
-		hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n");
-	} else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) &&
-		   !(lp_reg & lp_sym) && (lp_reg & lp_asm)) {
-		hw->fc.current_mode = ixgbe_fc_rx_pause;
-		hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n");
-	} else {
-		hw->fc.current_mode = ixgbe_fc_none;
-		hw_dbg(hw, "Flow Control = NONE.\n");
-	}
-	return 0;
-}
-
-/**
- *  ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber
- *  @hw: pointer to hardware structure
- *
- *  Enable flow control according on 1 gig fiber.
- **/
-static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
-{
-	u32 pcs_anadv_reg, pcs_lpab_reg, linkstat;
-	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
-
-	/*
-	 * On multispeed fiber at 1g, bail out if
-	 * - link is up but AN did not complete, or if
-	 * - link is up and AN completed but timed out
-	 */
-
-	linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
-	if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) ||
-	    (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1))
-		goto out;
-
-	pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
-	pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
-
-	ret_val =  ixgbe_negotiate_fc(hw, pcs_anadv_reg,
-				      pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE,
-				      IXGBE_PCS1GANA_ASM_PAUSE,
-				      IXGBE_PCS1GANA_SYM_PAUSE,
-				      IXGBE_PCS1GANA_ASM_PAUSE);
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37
- *  @hw: pointer to hardware structure
- *
- *  Enable flow control according to IEEE clause 37.
- **/
-static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
-{
-	u32 links2, anlp1_reg, autoc_reg, links;
-	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
-
-	/*
-	 * On backplane, bail out if
-	 * - backplane autoneg was not completed, or if
-	 * - we are 82599 and link partner is not AN enabled
-	 */
-	links = IXGBE_READ_REG(hw, IXGBE_LINKS);
-	if ((links & IXGBE_LINKS_KX_AN_COMP) == 0)
-		goto out;
-
-	if (hw->mac.type == ixgbe_mac_82599EB) {
-		links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2);
-		if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0)
-			goto out;
-	}
-	/*
-	 * Read the 10g AN autoc and LP ability registers and resolve
-	 * local flow control settings accordingly
-	 */
-	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1);
-
-	ret_val = ixgbe_negotiate_fc(hw, autoc_reg,
-		anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE,
-		IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE);
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37
- *  @hw: pointer to hardware structure
- *
- *  Enable flow control according to IEEE clause 37.
- **/
-static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
-{
-	u16 technology_ability_reg = 0;
-	u16 lp_technology_ability_reg = 0;
-
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
-			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-			     &technology_ability_reg);
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP,
-			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-			     &lp_technology_ability_reg);
-
-	return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg,
-				  (u32)lp_technology_ability_reg,
-				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE,
-				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE);
-}
-
-/**
- *  ixgbe_fc_autoneg - Configure flow control
- *  @hw: pointer to hardware structure
- *
- *  Compares our advertised flow control capabilities to those advertised by
- *  our link partner, and determines the proper flow control mode to use.
- **/
-void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
-{
-	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
-	ixgbe_link_speed speed;
-	bool link_up;
-
-	/*
-	 * AN should have completed when the cable was plugged in.
-	 * Look for reasons to bail out.  Bail out if:
-	 * - FC autoneg is disabled, or if
-	 * - link is not up.
-	 */
-	if (hw->fc.disable_fc_autoneg)
-		goto out;
-
-	hw->mac.ops.check_link(hw, &speed, &link_up, false);
-	if (!link_up)
-		goto out;
-
-	switch (hw->phy.media_type) {
-	/* Autoneg flow control on fiber adapters */
-	case ixgbe_media_type_fiber:
-		if (speed == IXGBE_LINK_SPEED_1GB_FULL)
-			ret_val = ixgbe_fc_autoneg_fiber(hw);
-		break;
-
-	/* Autoneg flow control on backplane adapters */
-	case ixgbe_media_type_backplane:
-		ret_val = ixgbe_fc_autoneg_backplane(hw);
-		break;
-
-	/* Autoneg flow control on copper adapters */
-	case ixgbe_media_type_copper:
-		if (ixgbe_device_supports_autoneg_fc(hw) == 0)
-			ret_val = ixgbe_fc_autoneg_copper(hw);
-		break;
-
-	default:
-		break;
-	}
-
-out:
-	if (ret_val == 0) {
-		hw->fc.fc_was_autonegged = true;
-	} else {
-		hw->fc.fc_was_autonegged = false;
-		hw->fc.current_mode = hw->fc.requested_mode;
-	}
-}
-
-/**
- *  ixgbe_disable_pcie_master - Disable PCI-express master access
- *  @hw: pointer to hardware structure
- *
- *  Disables PCI-Express master access and verifies there are no pending
- *  requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable
- *  bit hasn't caused the master requests to be disabled, else 0
- *  is returned signifying master requests disabled.
- **/
-s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u32 i;
-
-	/* Always set this bit to ensure any future transactions are blocked */
-	IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
-
-	/* Exit if master requets are blocked */
-	if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
-		goto out;
-
-	/* Poll for master request bit to clear */
-	for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
-		udelay(100);
-		if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
-			goto out;
-	}
-
-	/*
-	 * Two consecutive resets are required via CTRL.RST per datasheet
-	 * 5.2.5.3.2 Master Disable.  We set a flag to inform the reset routine
-	 * of this need.  The first reset prevents new master requests from
-	 * being issued by our device.  We then must wait 1usec or more for any
-	 * remaining completions from the PCIe bus to trickle in, and then reset
-	 * again to clear out any effects they may have had on our device.
-	 */
-	hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n");
-	hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
-
-	/*
-	 * Before proceeding, make sure that the PCIe block does not have
-	 * transactions pending.
-	 */
-	for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
-		udelay(100);
-		if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) &
-		    IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
-			goto out;
-	}
-
-	hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n");
-	status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_acquire_swfw_sync - Acquire SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to acquire
- *
- *  Acquires the SWFW semaphore through the GSSR register for the specified
- *  function (CSR, PHY0, PHY1, EEPROM, Flash)
- **/
-s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask)
-{
-	u32 gssr;
-	u32 swmask = mask;
-	u32 fwmask = mask << 5;
-	s32 timeout = 200;
-
-	while (timeout) {
-		/*
-		 * SW EEPROM semaphore bit is used for access to all
-		 * SW_FW_SYNC/GSSR bits (not just EEPROM)
-		 */
-		if (ixgbe_get_eeprom_semaphore(hw))
-			return IXGBE_ERR_SWFW_SYNC;
-
-		gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
-		if (!(gssr & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask) or other software
-		 * thread currently using resource (swmask)
-		 */
-		ixgbe_release_eeprom_semaphore(hw);
-		msleep(5);
-		timeout--;
-	}
-
-	if (!timeout) {
-		hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n");
-		return IXGBE_ERR_SWFW_SYNC;
-	}
-
-	gssr |= swmask;
-	IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
-
-	ixgbe_release_eeprom_semaphore(hw);
-	return 0;
-}
-
-/**
- *  ixgbe_release_swfw_sync - Release SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to release
- *
- *  Releases the SWFW semaphore through the GSSR register for the specified
- *  function (CSR, PHY0, PHY1, EEPROM, Flash)
- **/
-void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask)
-{
-	u32 gssr;
-	u32 swmask = mask;
-
-	ixgbe_get_eeprom_semaphore(hw);
-
-	gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
-	gssr &= ~swmask;
-	IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
-
-	ixgbe_release_eeprom_semaphore(hw);
-}
-
-/**
- *  ixgbe_disable_sec_rx_path_generic - Stops the receive data path
- *  @hw: pointer to hardware structure
- *
- *  Stops the receive data path and waits for the HW to internally empty
- *  the Rx security block
- **/
-s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw)
-{
-#define IXGBE_MAX_SECRX_POLL 40
-
-	int i;
-	int secrxreg;
-
-	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
-	secrxreg |= IXGBE_SECRXCTRL_RX_DIS;
-	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
-	for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) {
-		secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
-		if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY)
-			break;
-		else
-			/* Use interrupt-safe sleep just in case */
-			udelay(1000);
-	}
-
-	/* For informational purposes only */
-	if (i >= IXGBE_MAX_SECRX_POLL)
-		hw_dbg(hw, "Rx unit being enabled before security "
-			 "path fully disabled.  Continuing with init.\n");
-
-	return 0;
-}
-
-/**
- *  ixgbe_enable_sec_rx_path_generic - Enables the receive data path
- *  @hw: pointer to hardware structure
- *
- *  Enables the receive data path.
- **/
-s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw)
-{
-	int secrxreg;
-
-	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
-	secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS;
-	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit
- *  @hw: pointer to hardware structure
- *  @regval: register value to write to RXCTRL
- *
- *  Enables the Rx DMA unit
- **/
-s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
-{
-	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
-
-	return 0;
-}
-
-/**
- *  ixgbe_blink_led_start_generic - Blink LED based on index.
- *  @hw: pointer to hardware structure
- *  @index: led number to blink
- **/
-s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
-{
-	ixgbe_link_speed speed = 0;
-	bool link_up = 0;
-	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-
-	/*
-	 * Link must be up to auto-blink the LEDs;
-	 * Force it if link is down.
-	 */
-	hw->mac.ops.check_link(hw, &speed, &link_up, false);
-
-	if (!link_up) {
-		autoc_reg |= IXGBE_AUTOC_AN_RESTART;
-		autoc_reg |= IXGBE_AUTOC_FLU;
-		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
-		IXGBE_WRITE_FLUSH(hw);
-		msleep(10);
-	}
-
-	led_reg &= ~IXGBE_LED_MODE_MASK(index);
-	led_reg |= IXGBE_LED_BLINK(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_blink_led_stop_generic - Stop blinking LED based on index.
- *  @hw: pointer to hardware structure
- *  @index: led number to stop blinking
- **/
-s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
-{
-	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-
-	autoc_reg &= ~IXGBE_AUTOC_FLU;
-	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
-	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
-
-	led_reg &= ~IXGBE_LED_MODE_MASK(index);
-	led_reg &= ~IXGBE_LED_BLINK(index);
-	led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM
- *  @hw: pointer to hardware structure
- *  @san_mac_offset: SAN MAC address offset
- *
- *  This function will read the EEPROM location for the SAN MAC address
- *  pointer, and returns the value at that location.  This is used in both
- *  get and set mac_addr routines.
- **/
-static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
-					 u16 *san_mac_offset)
-{
-	/*
-	 * First read the EEPROM pointer to see if the MAC addresses are
-	 * available.
-	 */
-	hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset);
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM
- *  @hw: pointer to hardware structure
- *  @san_mac_addr: SAN MAC address
- *
- *  Reads the SAN MAC address from the EEPROM, if it's available.  This is
- *  per-port, so set_lan_id() must be called before reading the addresses.
- *  set_lan_id() is called by identify_sfp(), but this cannot be relied
- *  upon for non-SFP connections, so we must call it here.
- **/
-s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
-{
-	u16 san_mac_data, san_mac_offset;
-	u8 i;
-
-	/*
-	 * First read the EEPROM pointer to see if the MAC addresses are
-	 * available.  If they're not, no point in calling set_lan_id() here.
-	 */
-	ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
-
-	if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
-		/*
-		 * No addresses available in this EEPROM.  It's not an
-		 * error though, so just wipe the local address and return.
-		 */
-		for (i = 0; i < 6; i++)
-			san_mac_addr[i] = 0xFF;
-
-		goto san_mac_addr_out;
-	}
-
-	/* make sure we know which port we need to program */
-	hw->mac.ops.set_lan_id(hw);
-	/* apply the port offset to the address offset */
-	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
-			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
-	for (i = 0; i < 3; i++) {
-		hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data);
-		san_mac_addr[i * 2] = (u8)(san_mac_data);
-		san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8);
-		san_mac_offset++;
-	}
-
-san_mac_addr_out:
-	return 0;
-}
-
-/**
- *  ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM
- *  @hw: pointer to hardware structure
- *  @san_mac_addr: SAN MAC address
- *
- *  Write a SAN MAC address to the EEPROM.
- **/
-s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
-{
-	s32 status = 0;
-	u16 san_mac_data, san_mac_offset;
-	u8 i;
-
-	/* Look for SAN mac address pointer.  If not defined, return */
-	ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
-
-	if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
-		status = IXGBE_ERR_NO_SAN_ADDR_PTR;
-		goto san_mac_addr_out;
-	}
-
-	/* Make sure we know which port we need to write */
-	hw->mac.ops.set_lan_id(hw);
-	/* Apply the port offset to the address offset */
-	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
-			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
-
-	for (i = 0; i < 3; i++) {
-		san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8);
-		san_mac_data |= (u16)(san_mac_addr[i * 2]);
-		hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data);
-		san_mac_offset++;
-	}
-
-san_mac_addr_out:
-	return status;
-}
-
-/**
- *  ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count
- *  @hw: pointer to hardware structure
- *
- *  Read PCIe configuration space, and get the MSI-X vector count from
- *  the capabilities table.
- **/
-u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
-{
-	u16 msix_count = 1;
-	u16 max_msix_count;
-	u16 pcie_offset;
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS;
-		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598;
-		break;
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
-		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
-		break;
-	default:
-		return msix_count;
-	}
-
-	msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset);
-	msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK;
-
-	/* MSI-X count is zero-based in HW */
-	msix_count++;
-
-	if (msix_count > max_msix_count)
-		msix_count = max_msix_count;
-
-	return msix_count;
-}
-
-/**
- *  ixgbe_insert_mac_addr_generic - Find a RAR for this mac address
- *  @hw: pointer to hardware structure
- *  @addr: Address to put into receive address register
- *  @vmdq: VMDq pool to assign
- *
- *  Puts an ethernet address into a receive address register, or
- *  finds the rar that it is already in; adds to the pool list
- **/
-s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
-{
-	static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF;
-	u32 first_empty_rar = NO_EMPTY_RAR_FOUND;
-	u32 rar;
-	u32 rar_low, rar_high;
-	u32 addr_low, addr_high;
-
-	/* swap bytes for HW little endian */
-	addr_low  = addr[0] | (addr[1] << 8)
-			    | (addr[2] << 16)
-			    | (addr[3] << 24);
-	addr_high = addr[4] | (addr[5] << 8);
-
-	/*
-	 * Either find the mac_id in rar or find the first empty space.
-	 * rar_highwater points to just after the highest currently used
-	 * rar in order to shorten the search.  It grows when we add a new
-	 * rar to the top.
-	 */
-	for (rar = 0; rar < hw->mac.rar_highwater; rar++) {
-		rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
-
-		if (((IXGBE_RAH_AV & rar_high) == 0)
-		    && first_empty_rar == NO_EMPTY_RAR_FOUND) {
-			first_empty_rar = rar;
-		} else if ((rar_high & 0xFFFF) == addr_high) {
-			rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar));
-			if (rar_low == addr_low)
-				break;    /* found it already in the rars */
-		}
-	}
-
-	if (rar < hw->mac.rar_highwater) {
-		/* already there so just add to the pool bits */
-		ixgbe_set_vmdq(hw, rar, vmdq);
-	} else if (first_empty_rar != NO_EMPTY_RAR_FOUND) {
-		/* stick it into first empty RAR slot we found */
-		rar = first_empty_rar;
-		ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
-	} else if (rar == hw->mac.rar_highwater) {
-		/* add it to the top of the list and inc the highwater mark */
-		ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
-		hw->mac.rar_highwater++;
-	} else if (rar >= hw->mac.num_rar_entries) {
-		return IXGBE_ERR_INVALID_MAC_ADDR;
-	}
-
-	/*
-	 * If we found rar[0], make sure the default pool bit (we use pool 0)
-	 * remains cleared to be sure default pool packets will get delivered
-	 */
-	if (rar == 0)
-		ixgbe_clear_vmdq(hw, rar, 0);
-
-	return rar;
-}
-
-/**
- *  ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address
- *  @hw: pointer to hardware struct
- *  @rar: receive address register index to disassociate
- *  @vmdq: VMDq pool index to remove from the rar
- **/
-s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	u32 mpsar_lo, mpsar_hi;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/* Make sure we are using a valid rar index range */
-	if (rar >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
-	mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
-
-	if (!mpsar_lo && !mpsar_hi)
-		goto done;
-
-	if (vmdq == IXGBE_CLEAR_VMDQ_ALL) {
-		if (mpsar_lo) {
-			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
-			mpsar_lo = 0;
-		}
-		if (mpsar_hi) {
-			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
-			mpsar_hi = 0;
-		}
-	} else if (vmdq < 32) {
-		mpsar_lo &= ~(1 << vmdq);
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo);
-	} else {
-		mpsar_hi &= ~(1 << (vmdq - 32));
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi);
-	}
-
-	/* was that the last pool using this rar? */
-	if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
-		hw->mac.ops.clear_rar(hw, rar);
-done:
-	return 0;
-}
-
-/**
- *  ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address
- *  @hw: pointer to hardware struct
- *  @rar: receive address register index to associate with a VMDq index
- *  @vmdq: VMDq pool index
- **/
-s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
-{
-	u32 mpsar;
-	u32 rar_entries = hw->mac.num_rar_entries;
-
-	/* Make sure we are using a valid rar index range */
-	if (rar >= rar_entries) {
-		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
-		return IXGBE_ERR_INVALID_ARGUMENT;
-	}
-
-	if (vmdq < 32) {
-		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
-		mpsar |= 1 << vmdq;
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
-	} else {
-		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
-		mpsar |= 1 << (vmdq - 32);
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
-	}
-	return 0;
-}
-
-/**
- *  This function should only be involved in the IOV mode.
- *  In IOV mode, Default pool is next pool after the number of
- *  VFs advertized and not 0.
- *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
- *
- *  ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
- *  @hw: pointer to hardware struct
- *  @vmdq: VMDq pool index
- **/
-s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
-{
-	u32 mpsar;
-	u32 rar = hw->mac.san_mac_rar_index;
-
-	if (vmdq < 32) {
-		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
-		mpsar |= 1 << vmdq;
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
-	} else {
-		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
-		mpsar |= 1 << (vmdq - 32);
-		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
-{
-	int i;
-
-	hw_dbg(hw, " Clearing UTA\n");
-
-	for (i = 0; i < 128; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0);
-
-	return 0;
-}
-
-/**
- *  ixgbe_find_vlvf_slot - find the vlanid or the first empty slot
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *
- *  return the VLVF index where this VLAN id should be placed
- *
- **/
-s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan)
-{
-	u32 bits = 0;
-	u32 first_empty_slot = 0;
-	s32 regindex;
-
-	/* short cut the special case */
-	if (vlan == 0)
-		return 0;
-
-	/*
-	  * Search for the vlan id in the VLVF entries. Save off the first empty
-	  * slot found along the way
-	  */
-	for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) {
-		bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
-		if (!bits && !(first_empty_slot))
-			first_empty_slot = regindex;
-		else if ((bits & 0x0FFF) == vlan)
-			break;
-	}
-
-	/*
-	  * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan
-	  * in the VLVF. Else use the first empty VLVF register for this
-	  * vlan id.
-	  */
-	if (regindex >= IXGBE_VLVF_ENTRIES) {
-		if (first_empty_slot)
-			regindex = first_empty_slot;
-		else {
-			hw_dbg(hw, "No space in VLVF.\n");
-			regindex = IXGBE_ERR_NO_SPACE;
-		}
-	}
-
-	return regindex;
-}
-
-/**
- *  ixgbe_set_vfta_generic - Set VLAN filter table
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
- *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
- *
- *  Turn on/off specified VLAN in the VLAN filter table.
- **/
-s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-			   bool vlan_on)
-{
-	s32 regindex;
-	u32 bitindex;
-	u32 vfta;
-	u32 targetbit;
-	s32 ret_val = 0;
-	bool vfta_changed = false;
-
-	if (vlan > 4095)
-		return IXGBE_ERR_PARAM;
-
-	/*
-	 * this is a 2 part operation - first the VFTA, then the
-	 * VLVF and VLVFB if VT Mode is set
-	 * We don't write the VFTA until we know the VLVF part succeeded.
-	 */
-
-	/* Part 1
-	 * The VFTA is a bitstring made up of 128 32-bit registers
-	 * that enable the particular VLAN id, much like the MTA:
-	 *    bits[11-5]: which register
-	 *    bits[4-0]:  which bit in the register
-	 */
-	regindex = (vlan >> 5) & 0x7F;
-	bitindex = vlan & 0x1F;
-	targetbit = (1 << bitindex);
-	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
-
-	if (vlan_on) {
-		if (!(vfta & targetbit)) {
-			vfta |= targetbit;
-			vfta_changed = true;
-		}
-	} else {
-		if (vfta & targetbit) {
-			vfta &= ~targetbit;
-			vfta_changed = true;
-		}
-	}
-
-	/* Part 2
-	 * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF
-	 */
-	ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on,
-					 &vfta_changed);
-	if (ret_val != 0)
-		return ret_val;
-
-	if (vfta_changed)
-		IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta);
-
-	return 0;
-}
-
-/**
- *  ixgbe_set_vlvf_generic - Set VLAN Pool Filter
- *  @hw: pointer to hardware structure
- *  @vlan: VLAN id to write to VLAN filter
- *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
- *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
- *  @vfta_changed: pointer to boolean flag which indicates whether VFTA
- *                 should be changed
- *
- *  Turn on/off specified bit in VLVF table.
- **/
-s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-			    bool vlan_on, bool *vfta_changed)
-{
-	u32 vt;
-
-	if (vlan > 4095)
-		return IXGBE_ERR_PARAM;
-
-	/* If VT Mode is set
-	 *   Either vlan_on
-	 *     make sure the vlan is in VLVF
-	 *     set the vind bit in the matching VLVFB
-	 *   Or !vlan_on
-	 *     clear the pool bit and possibly the vind
-	 */
-	vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
-	if (vt & IXGBE_VT_CTL_VT_ENABLE) {
-		s32 vlvf_index;
-		u32 bits;
-
-		vlvf_index = ixgbe_find_vlvf_slot(hw, vlan);
-		if (vlvf_index < 0)
-			return vlvf_index;
-
-		if (vlan_on) {
-			/* set the pool bit */
-			if (vind < 32) {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index * 2));
-				bits |= (1 << vind);
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB(vlvf_index * 2),
-						bits);
-			} else {
-				bits = IXGBE_READ_REG(hw,
-					IXGBE_VLVFB((vlvf_index * 2) + 1));
-				bits |= (1 << (vind - 32));
-				IXGBE_WRITE_REG(hw,
-					IXGBE_VLVFB((vlvf_index * 2) + 1),
-					bits);
-			}
-		} else {
-			/* clear the pool bit */
-			if (vind < 32) {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index * 2));
-				bits &= ~(1 << vind);
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB(vlvf_index * 2),
-						bits);
-				bits |= IXGBE_READ_REG(hw,
-					IXGBE_VLVFB((vlvf_index * 2) + 1));
-			} else {
-				bits = IXGBE_READ_REG(hw,
-					IXGBE_VLVFB((vlvf_index * 2) + 1));
-				bits &= ~(1 << (vind - 32));
-				IXGBE_WRITE_REG(hw,
-					IXGBE_VLVFB((vlvf_index * 2) + 1),
-					bits);
-				bits |= IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index * 2));
-			}
-		}
-
-		/*
-		 * If there are still bits set in the VLVFB registers
-		 * for the VLAN ID indicated we need to see if the
-		 * caller is requesting that we clear the VFTA entry bit.
-		 * If the caller has requested that we clear the VFTA
-		 * entry bit but there are still pools/VFs using this VLAN
-		 * ID entry then ignore the request.  We're not worried
-		 * about the case where we're turning the VFTA VLAN ID
-		 * entry bit on, only when requested to turn it off as
-		 * there may be multiple pools and/or VFs using the
-		 * VLAN ID entry.  In that case we cannot clear the
-		 * VFTA bit until all pools/VFs using that VLAN ID have also
-		 * been cleared.  This will be indicated by "bits" being
-		 * zero.
-		 */
-		if (bits) {
-			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index),
-					(IXGBE_VLVF_VIEN | vlan));
-			if (!vlan_on && (vfta_changed != NULL)) {
-				/* someone wants to clear the vfta entry
-				 * but some pools/VFs are still using it.
-				 * Ignore it. */
-				*vfta_changed = false;
-			}
-		} else
-			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_clear_vfta_generic - Clear VLAN filter table
- *  @hw: pointer to hardware structure
- *
- *  Clears the VLAN filer table, and the VMDq index associated with the filter
- **/
-s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
-{
-	u32 offset;
-
-	for (offset = 0; offset < hw->mac.vft_size; offset++)
-		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
-
-	for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) {
-		IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_check_mac_link_generic - Determine link and speed status
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @link_up: true when link is up
- *  @link_up_wait_to_complete: bool used to wait for link up or not
- *
- *  Reads the links register to determine if link is up and the current speed
- **/
-s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-				 bool *link_up, bool link_up_wait_to_complete)
-{
-	u32 links_reg, links_orig;
-	u32 i;
-
-	/* clear the old state */
-	links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
-
-	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-
-	if (links_orig != links_reg) {
-		hw_dbg(hw, "LINKS changed from %08X to %08X\n",
-			  links_orig, links_reg);
-	}
-
-	if (link_up_wait_to_complete) {
-		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
-			if (links_reg & IXGBE_LINKS_UP) {
-				*link_up = true;
-				break;
-			} else {
-				*link_up = false;
-			}
-			msleep(100);
-			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
-		}
-	} else {
-		if (links_reg & IXGBE_LINKS_UP)
-			*link_up = true;
-		else
-			*link_up = false;
-	}
-
-	if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
-	    IXGBE_LINKS_SPEED_10G_82599)
-		*speed = IXGBE_LINK_SPEED_10GB_FULL;
-	else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
-		 IXGBE_LINKS_SPEED_1G_82599)
-		*speed = IXGBE_LINK_SPEED_1GB_FULL;
-	else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
-		 IXGBE_LINKS_SPEED_100_82599)
-		*speed = IXGBE_LINK_SPEED_100_FULL;
-	else
-		*speed = IXGBE_LINK_SPEED_UNKNOWN;
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from
- *  the EEPROM
- *  @hw: pointer to hardware structure
- *  @wwnn_prefix: the alternative WWNN prefix
- *  @wwpn_prefix: the alternative WWPN prefix
- *
- *  This function will read the EEPROM from the alternative SAN MAC address
- *  block to check the support for the alternative WWNN/WWPN prefix support.
- **/
-s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-				 u16 *wwpn_prefix)
-{
-	u16 offset, caps;
-	u16 alt_san_mac_blk_offset;
-
-	/* clear output first */
-	*wwnn_prefix = 0xFFFF;
-	*wwpn_prefix = 0xFFFF;
-
-	/* check if alternative SAN MAC is supported */
-	hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR,
-			    &alt_san_mac_blk_offset);
-
-	if ((alt_san_mac_blk_offset == 0) ||
-	    (alt_san_mac_blk_offset == 0xFFFF))
-		goto wwn_prefix_out;
-
-	/* check capability in alternative san mac address block */
-	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET;
-	hw->eeprom.ops.read(hw, offset, &caps);
-	if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN))
-		goto wwn_prefix_out;
-
-	/* get the corresponding prefix for WWNN/WWPN */
-	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET;
-	hw->eeprom.ops.read(hw, offset, wwnn_prefix);
-
-	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET;
-	hw->eeprom.ops.read(hw, offset, wwpn_prefix);
-
-wwn_prefix_out:
-	return 0;
-}
-
-/**
- *  ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM
- *  @hw: pointer to hardware structure
- *  @bs: the fcoe boot status
- *
- *  This function will read the FCOE boot status from the iSCSI FCOE block
- **/
-s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs)
-{
-	u16 offset, caps, flags;
-	s32 status;
-
-	/* clear output first */
-	*bs = ixgbe_fcoe_bootstatus_unavailable;
-
-	/* check if FCOE IBA block is present */
-	offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR;
-	status = hw->eeprom.ops.read(hw, offset, &caps);
-	if (status != 0)
-		goto out;
-
-	if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE))
-		goto out;
-
-	/* check if iSCSI FCOE block is populated */
-	status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset);
-	if (status != 0)
-		goto out;
-
-	if ((offset == 0) || (offset == 0xFFFF))
-		goto out;
-
-	/* read fcoe flags in iSCSI FCOE block */
-	offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET;
-	status = hw->eeprom.ops.read(hw, offset, &flags);
-	if (status != 0)
-		goto out;
-
-	if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE)
-		*bs = ixgbe_fcoe_bootstatus_enabled;
-	else
-		*bs = ixgbe_fcoe_bootstatus_disabled;
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing
- *  @hw: pointer to hardware structure
- *  @enable: enable or disable switch for anti-spoofing
- *  @pf: Physical Function pool - do not enable anti-spoofing for the PF
- *
- **/
-void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf)
-{
-	int j;
-	int pf_target_reg = pf >> 3;
-	int pf_target_shift = pf % 8;
-	u32 pfvfspoof = 0;
-
-	if (hw->mac.type == ixgbe_mac_82598EB)
-		return;
-
-	if (enable)
-		pfvfspoof = IXGBE_SPOOF_MACAS_MASK;
-
-	/*
-	 * PFVFSPOOF register array is size 8 with 8 bits assigned to
-	 * MAC anti-spoof enables in each register array element.
-	 */
-	for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++)
-		IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof);
-
-	/* If not enabling anti-spoofing then done */
-	if (!enable)
-		return;
-
-	/*
-	 * The PF should be allowed to spoof so that it can support
-	 * emulation mode NICs.  Reset the bit assigned to the PF
-	 */
-	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg));
-	pfvfspoof ^= (1 << pf_target_shift);
-	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof);
-}
-
-/**
- *  ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing
- *  @hw: pointer to hardware structure
- *  @enable: enable or disable switch for VLAN anti-spoofing
- *  @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing
- *
- **/
-void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
-{
-	int vf_target_reg = vf >> 3;
-	int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT;
-	u32 pfvfspoof;
-
-	if (hw->mac.type == ixgbe_mac_82598EB)
-		return;
-
-	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
-	if (enable)
-		pfvfspoof |= (1 << vf_target_shift);
-	else
-		pfvfspoof &= ~(1 << vf_target_shift);
-	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
-}
-
-/**
- *  ixgbe_get_device_caps_generic - Get additional device capabilities
- *  @hw: pointer to hardware structure
- *  @device_caps: the EEPROM word with the extra device capabilities
- *
- *  This function will read the EEPROM location for the device capabilities,
- *  and return the word through device_caps.
- **/
-s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
-{
-	hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps);
-
-	return 0;
-}
-
-/**
- *  ixgbe_calculate_checksum - Calculate checksum for buffer
- *  @buffer: pointer to EEPROM
- *  @length: size of EEPROM to calculate a checksum for
- *  Calculates the checksum for some buffer on a specified length.  The
- *  checksum calculated is returned.
- **/
-static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
-{
-	u32 i;
-	u8 sum = 0;
-
-	if (!buffer)
-		return 0;
-	for (i = 0; i < length; i++)
-		sum += buffer[i];
-
-	return (u8) (0 - sum);
-}
-
-/**
- *  ixgbe_host_interface_command - Issue command to manageability block
- *  @hw: pointer to the HW structure
- *  @buffer: contains the command to write and where the return status will
- *   be placed
- *  @length: length of buffer, must be multiple of 4 bytes
- *
- *  Communicates with the manageability block.  On success return 0
- *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
- **/
-static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
-					u32 length)
-{
-	u32 hicr, i, bi;
-	u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
-	u8 buf_len, dword_len;
-
-	s32 ret_val = 0;
-
-	if (length == 0 || length & 0x3 ||
-	    length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
-		hw_dbg(hw, "Buffer length failure.\n");
-		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-		goto out;
-	}
-
-	/* Check that the host interface is enabled. */
-	hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
-	if ((hicr & IXGBE_HICR_EN) == 0) {
-		hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
-		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-		goto out;
-	}
-
-	/* Calculate length in DWORDs */
-	dword_len = length >> 2;
-
-	/*
-	 * The device driver writes the relevant command block
-	 * into the ram area.
-	 */
-	for (i = 0; i < dword_len; i++)
-		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
-				      i, IXGBE_CPU_TO_LE32(buffer[i]));
-
-	/* Setting this bit tells the ARC that a new command is pending. */
-	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
-
-	for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) {
-		hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
-		if (!(hicr & IXGBE_HICR_C))
-			break;
-		msleep(1);
-	}
-
-	/* Check command successful completion. */
-	if (i == IXGBE_HI_COMMAND_TIMEOUT ||
-	    (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) {
-		hw_dbg(hw, "Command has failed with no status valid.\n");
-		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-		goto out;
-	}
-
-	/* Calculate length in DWORDs */
-	dword_len = hdr_size >> 2;
-
-	/* first pull in the header so we know the buffer length */
-	for (bi = 0; bi < dword_len; bi++) {
-		buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
-		IXGBE_LE32_TO_CPUS(&buffer[bi]);
-	}
-
-	/* If there is any thing in data position pull it in */
-	buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
-	if (buf_len == 0)
-		goto out;
-
-	if (length < (buf_len + hdr_size)) {
-		hw_dbg(hw, "Buffer not large enough for reply message.\n");
-		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-		goto out;
-	}
-
-	/* Calculate length in DWORDs, add 3 for odd lengths */
-	dword_len = (buf_len + 3) >> 2;
-
-	/* Pull in the rest of the buffer (bi is where we left off)*/
-	for (; bi <= dword_len; bi++) {
-		buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
-		IXGBE_LE32_TO_CPUS(&buffer[bi]);
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware
- *  @hw: pointer to the HW structure
- *  @maj: driver version major number
- *  @min: driver version minor number
- *  @build: driver version build number
- *  @sub: driver version sub build number
- *
- *  Sends driver version number to firmware through the manageability
- *  block.  On success return 0
- *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
- *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
- **/
-s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
-				 u8 build, u8 sub)
-{
-	struct ixgbe_hic_drv_info fw_cmd;
-	int i;
-	s32 ret_val = 0;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM)
-	    != 0) {
-		ret_val = IXGBE_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
-	fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
-	fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
-	fw_cmd.port_num = (u8)hw->bus.func;
-	fw_cmd.ver_maj = maj;
-	fw_cmd.ver_min = min;
-	fw_cmd.ver_build = build;
-	fw_cmd.ver_sub = sub;
-	fw_cmd.hdr.checksum = 0;
-	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
-				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
-	fw_cmd.pad = 0;
-	fw_cmd.pad2 = 0;
-
-	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
-		ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
-						       sizeof(fw_cmd));
-		if (ret_val != 0)
-			continue;
-
-		if (fw_cmd.hdr.cmd_or_resp.ret_status ==
-		    FW_CEM_RESP_STATUS_SUCCESS)
-			ret_val = 0;
-		else
-			ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-
-		break;
-	}
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
-out:
-	return ret_val;
-}
-
-/**
- * ixgbe_set_rxpba_generic - Initialize Rx packet buffer
- * @hw: pointer to hardware structure
- * @num_pb: number of packet buffers to allocate
- * @headroom: reserve n KB of headroom
- * @strategy: packet buffer allocation strategy
- **/
-void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
-			     int strategy)
-{
-	u32 pbsize = hw->mac.rx_pb_size;
-	int i = 0;
-	u32 rxpktsize, txpktsize, txpbthresh;
-
-	/* Reserve headroom */
-	pbsize -= headroom;
-
-	if (!num_pb)
-		num_pb = 1;
-
-	/* Divide remaining packet buffer space amongst the number of packet
-	 * buffers requested using supplied strategy.
-	 */
-	switch (strategy) {
-	case PBA_STRATEGY_WEIGHTED:
-		/* ixgbe_dcb_pba_80_48 strategy weight first half of packet
-		 * buffer with 5/8 of the packet buffer space.
-		 */
-		rxpktsize = (pbsize * 5) / (num_pb * 4);
-		pbsize -= rxpktsize * (num_pb / 2);
-		rxpktsize <<= IXGBE_RXPBSIZE_SHIFT;
-		for (; i < (num_pb / 2); i++)
-			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
-		/* Fall through to configure remaining packet buffers */
-	case PBA_STRATEGY_EQUAL:
-		rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT;
-		for (; i < num_pb; i++)
-			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
-		break;
-	default:
-		break;
-	}
-
-	/* Only support an equally distributed Tx packet buffer strategy. */
-	txpktsize = IXGBE_TXPBSIZE_MAX / num_pb;
-	txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX;
-	for (i = 0; i < num_pb; i++) {
-		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
-		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
-	}
-
-	/* Clear unused TCs, if any, to zero buffer size*/
-	for (; i < IXGBE_MAX_PB; i++) {
-		IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
-	}
-}
-
-/**
- * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo
- * @hw: pointer to the hardware structure
- *
- * The 82599 and x540 MACs can experience issues if TX work is still pending
- * when a reset occurs.  This function prevents this by flushing the PCIe
- * buffers on the system.
- **/
-void ixgbe_clear_tx_pending(struct ixgbe_hw *hw)
-{
-	u32 gcr_ext, hlreg0;
-
-	/*
-	 * If double reset is not requested then all transactions should
-	 * already be clear and as such there is no work to do
-	 */
-	if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED))
-		return;
-
-	/*
-	 * Set loopback enable to prevent any transmits from being sent
-	 * should the link come up.  This assumes that the RXCTRL.RXEN bit
-	 * has already been cleared.
-	 */
-	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK);
-
-	/* initiate cleaning flow for buffers in the PCIe transaction layer */
-	gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
-	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT,
-			gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR);
-
-	/* Flush all writes and allow 20usec for all transactions to clear */
-	IXGBE_WRITE_FLUSH(hw);
-	udelay(20);
-
-	/* restore previous register values */
-	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
-	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
-}
-
-static const u8 ixgbe_emc_temp_data[4] = {
-	IXGBE_EMC_INTERNAL_DATA,
-	IXGBE_EMC_DIODE1_DATA,
-	IXGBE_EMC_DIODE2_DATA,
-	IXGBE_EMC_DIODE3_DATA
-};
-static const u8 ixgbe_emc_therm_limit[4] = {
-	IXGBE_EMC_INTERNAL_THERM_LIMIT,
-	IXGBE_EMC_DIODE1_THERM_LIMIT,
-	IXGBE_EMC_DIODE2_THERM_LIMIT,
-	IXGBE_EMC_DIODE3_THERM_LIMIT
-};
-
-/**
- *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
- *  @hw: pointer to hardware structure
- *  @data: pointer to the thermal sensor data structure
- *
- *  Returns the thermal sensor data structure
- **/
-s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u16 ets_offset;
-	u16 ets_cfg;
-	u16 ets_sensor;
-	u8  num_sensors;
-	u8  sensor_index;
-	u8  sensor_location;
-	u8  i;
-	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
-
-	/* Only support thermal sensors attached to 82599 physical port 0 */
-	if ((hw->mac.type != ixgbe_mac_82599EB) ||
-	    (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) {
-		status = IXGBE_NOT_IMPLEMENTED;
-		goto out;
-	}
-
-	status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
-	if (status)
-		goto out;
-
-	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) {
-		status = IXGBE_NOT_IMPLEMENTED;
-		goto out;
-	}
-
-	status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
-	if (status)
-		goto out;
-
-	if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
-		!= IXGBE_ETS_TYPE_EMC) {
-		status = IXGBE_NOT_IMPLEMENTED;
-		goto out;
-	}
-
-	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
-	if (num_sensors > IXGBE_MAX_SENSORS)
-		num_sensors = IXGBE_MAX_SENSORS;
-
-	for (i = 0; i < num_sensors; i++) {
-		status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i),
-					     &ets_sensor);
-		if (status)
-			goto out;
-
-		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
-				IXGBE_ETS_DATA_INDEX_SHIFT);
-		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
-				   IXGBE_ETS_DATA_LOC_SHIFT);
-
-		if (sensor_location != 0) {
-			status = hw->phy.ops.read_i2c_byte(hw,
-					ixgbe_emc_temp_data[sensor_index],
-					IXGBE_I2C_THERMAL_SENSOR_ADDR,
-					&data->sensor[i].temp);
-			if (status)
-				goto out;
-		}
-	}
-out:
-	return status;
-}
-
-/**
- *  ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds
- *  @hw: pointer to hardware structure
- *
- *  Inits the thermal sensor thresholds according to the NVM map
- *  and save off the threshold and location values into mac.thermal_sensor_data
- **/
-s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u16 ets_offset;
-	u16 ets_cfg;
-	u16 ets_sensor;
-	u8  low_thresh_delta;
-	u8  num_sensors;
-	u8  sensor_index;
-	u8  sensor_location;
-	u8  therm_limit;
-	u8  i;
-	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
-
-	memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data));
-
-	/* Only support thermal sensors attached to 82599 physical port 0 */
-	if ((hw->mac.type != ixgbe_mac_82599EB) ||
-	    (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1))
-		return IXGBE_NOT_IMPLEMENTED;
-
-	hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
-	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
-		return IXGBE_NOT_IMPLEMENTED;
-
-	hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
-	if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
-		!= IXGBE_ETS_TYPE_EMC)
-		return IXGBE_NOT_IMPLEMENTED;
-
-	low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
-			     IXGBE_ETS_LTHRES_DELTA_SHIFT);
-	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
-
-	for (i = 0; i < num_sensors; i++) {
-		hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor);
-		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
-				IXGBE_ETS_DATA_INDEX_SHIFT);
-		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
-				   IXGBE_ETS_DATA_LOC_SHIFT);
-		therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
-
-		hw->phy.ops.write_i2c_byte(hw,
-			ixgbe_emc_therm_limit[sensor_index],
-			IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit);
-
-		if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) {
-			data->sensor[i].location = sensor_location;
-			data->sensor[i].caution_thresh = therm_limit;
-			data->sensor[i].max_op_thresh = therm_limit -
-							low_thresh_delta;
-		}
-	}
-	return status;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
deleted file mode 100644
index 2989a80b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_COMMON_H_
-#define _IXGBE_COMMON_H_
-
-#include "ixgbe_type.h"
-
-u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw);
-
-s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw);
-s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
-s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
-s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
-				  u32 pba_num_size);
-s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
-s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
-void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
-s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
-
-s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
-
-s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
-s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-					       u16 words, u16 *data);
-s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
-s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
-				   u16 words, u16 *data);
-s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
-				    u16 words, u16 *data);
-s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-				       u16 *data);
-s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
-					      u16 words, u16 *data);
-u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
-s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
-					   u16 *checksum_val);
-s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
-s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
-
-s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
-			  u32 enable_addr);
-s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
-s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
-				      u32 mc_addr_count,
-				      ixgbe_mc_addr_itr func, bool clear);
-s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
-				      u32 addr_count, ixgbe_mc_addr_itr func);
-s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
-s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
-s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
-s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw);
-s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw);
-
-s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
-void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
-
-s32 ixgbe_validate_mac_addr(u8 *mac_addr);
-s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
-void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
-s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw);
-
-s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
-
-s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
-s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
-
-s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
-s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
-s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
-s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
-s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
-			 u32 vind, bool vlan_on);
-s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-			   bool vlan_on, bool *vfta_changed);
-s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
-s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan);
-
-s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
-			       ixgbe_link_speed *speed,
-			       bool *link_up, bool link_up_wait_to_complete);
-
-s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
-				 u16 *wwpn_prefix);
-
-s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs);
-void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf);
-void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
-s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
-void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
-			     int strategy);
-s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
-				 u8 build, u8 ver);
-void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
-
-#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
-#define IXGBE_EMC_INTERNAL_DATA		0x00
-#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
-#define IXGBE_EMC_DIODE1_DATA		0x01
-#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
-#define IXGBE_EMC_DIODE2_DATA		0x23
-#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
-#define IXGBE_EMC_DIODE3_DATA		0x2A
-#define IXGBE_EMC_DIODE3_THERM_LIMIT	0x30
-
-s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
-s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
-#endif /* IXGBE_COMMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
deleted file mode 100644
index e9a099d5..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_DCB_H_
-#define _IXGBE_DCB_H_
-
-
-#include "ixgbe_type.h"
-
-/* DCB defines */
-/* DCB credit calculation defines */
-#define IXGBE_DCB_CREDIT_QUANTUM	64
-#define IXGBE_DCB_MAX_CREDIT_REFILL	200   /* 200 * 64B = 12800B */
-#define IXGBE_DCB_MAX_TSO_SIZE		(32 * 1024) /* Max TSO pkt size in DCB*/
-#define IXGBE_DCB_MAX_CREDIT		(2 * IXGBE_DCB_MAX_CREDIT_REFILL)
-
-/* 513 for 32KB TSO packet */
-#define IXGBE_DCB_MIN_TSO_CREDIT	\
-	((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1)
-
-/* DCB configuration defines */
-#define IXGBE_DCB_MAX_USER_PRIORITY	8
-#define IXGBE_DCB_MAX_BW_GROUP		8
-#define IXGBE_DCB_BW_PERCENT		100
-
-#define IXGBE_DCB_TX_CONFIG		0
-#define IXGBE_DCB_RX_CONFIG		1
-
-/* DCB capability defines */
-#define IXGBE_DCB_PG_SUPPORT	0x00000001
-#define IXGBE_DCB_PFC_SUPPORT	0x00000002
-#define IXGBE_DCB_BCN_SUPPORT	0x00000004
-#define IXGBE_DCB_UP2TC_SUPPORT	0x00000008
-#define IXGBE_DCB_GSP_SUPPORT	0x00000010
-
-struct ixgbe_dcb_support {
-	u32 capabilities; /* DCB capabilities */
-
-	/* Each bit represents a number of TCs configurable in the hw.
-	 * If 8 traffic classes can be configured, the value is 0x80. */
-	u8 traffic_classes;
-	u8 pfc_traffic_classes;
-};
-
-enum ixgbe_dcb_tsa {
-	ixgbe_dcb_tsa_ets = 0,
-	ixgbe_dcb_tsa_group_strict_cee,
-	ixgbe_dcb_tsa_strict
-};
-
-/* Traffic class bandwidth allocation per direction */
-struct ixgbe_dcb_tc_path {
-	u8 bwg_id; /* Bandwidth Group (BWG) ID */
-	u8 bwg_percent; /* % of BWG's bandwidth */
-	u8 link_percent; /* % of link bandwidth */
-	u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */
-	u16 data_credits_refill; /* Credit refill amount in 64B granularity */
-	u16 data_credits_max; /* Max credits for a configured packet buffer
-			       * in 64B granularity.*/
-	enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */
-};
-
-enum ixgbe_dcb_pfc {
-	ixgbe_dcb_pfc_disabled = 0,
-	ixgbe_dcb_pfc_enabled,
-	ixgbe_dcb_pfc_enabled_txonly,
-	ixgbe_dcb_pfc_enabled_rxonly
-};
-
-/* Traffic class configuration */
-struct ixgbe_dcb_tc_config {
-	struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */
-	enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */
-
-	u16 desc_credits_max; /* For Tx Descriptor arbitration */
-	u8 tc; /* Traffic class (TC) */
-};
-
-enum ixgbe_dcb_pba {
-	/* PBA[0-7] each use 64KB FIFO */
-	ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL,
-	/* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
-	ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED
-};
-
-struct ixgbe_dcb_num_tcs {
-	u8 pg_tcs;
-	u8 pfc_tcs;
-};
-
-struct ixgbe_dcb_config {
-	struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS];
-	struct ixgbe_dcb_support support;
-	struct ixgbe_dcb_num_tcs num_tcs;
-	u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */
-	bool pfc_mode_enable;
-	bool round_robin_enable;
-
-	enum ixgbe_dcb_pba rx_pba_cfg;
-
-	u32 dcb_cfg_version; /* Not used...OS-specific? */
-	u32 link_speed; /* For bandwidth allocation validation purpose */
-	bool vt_mode;
-};
-
-/* DCB driver APIs */
-
-/* DCB rule checking */
-s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *);
-
-/* DCB credits calculation */
-s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int);
-s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *,
-				       struct ixgbe_dcb_config *, u32, u8);
-
-/* DCB PFC */
-s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *);
-s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
-
-/* DCB stats */
-s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
-s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
-s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
-
-/* DCB config arbiters */
-s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *,
-					 struct ixgbe_dcb_config *);
-s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *,
-					 struct ixgbe_dcb_config *);
-s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *,
-				    struct ixgbe_dcb_config *);
-
-/* DCB unpack routines */
-void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *);
-void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *);
-void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *);
-void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *);
-void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *);
-void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *);
-
-/* DCB initialization */
-s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *);
-s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
-#endif /* _IXGBE_DCB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
deleted file mode 100644
index 44cdc9f2..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
+++ /dev/null
@@ -1,2886 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/* ethtool support for ixgbe */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#ifdef SIOCETHTOOL
-#include <asm/uaccess.h>
-
-#include "ixgbe.h"
-
-#ifndef ETH_GSTRING_LEN
-#define ETH_GSTRING_LEN 32
-#endif
-
-#define IXGBE_ALL_RAR_ENTRIES 16
-
-#ifdef ETHTOOL_OPS_COMPAT
-#include "kcompat_ethtool.c"
-#endif
-#ifdef ETHTOOL_GSTATS
-struct ixgbe_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int sizeof_stat;
-	int stat_offset;
-};
-
-#define IXGBE_NETDEV_STAT(_net_stat) { \
-	.stat_string = #_net_stat, \
-	.sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
-	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
-}
-static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = {
-	IXGBE_NETDEV_STAT(rx_packets),
-	IXGBE_NETDEV_STAT(tx_packets),
-	IXGBE_NETDEV_STAT(rx_bytes),
-	IXGBE_NETDEV_STAT(tx_bytes),
-	IXGBE_NETDEV_STAT(rx_errors),
-	IXGBE_NETDEV_STAT(tx_errors),
-	IXGBE_NETDEV_STAT(rx_dropped),
-	IXGBE_NETDEV_STAT(tx_dropped),
-	IXGBE_NETDEV_STAT(multicast),
-	IXGBE_NETDEV_STAT(collisions),
-	IXGBE_NETDEV_STAT(rx_over_errors),
-	IXGBE_NETDEV_STAT(rx_crc_errors),
-	IXGBE_NETDEV_STAT(rx_frame_errors),
-	IXGBE_NETDEV_STAT(rx_fifo_errors),
-	IXGBE_NETDEV_STAT(rx_missed_errors),
-	IXGBE_NETDEV_STAT(tx_aborted_errors),
-	IXGBE_NETDEV_STAT(tx_carrier_errors),
-	IXGBE_NETDEV_STAT(tx_fifo_errors),
-	IXGBE_NETDEV_STAT(tx_heartbeat_errors),
-};
-
-#define IXGBE_STAT(_name, _stat) { \
-	.stat_string = _name, \
-	.sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \
-	.stat_offset = offsetof(struct ixgbe_adapter, _stat) \
-}
-static struct ixgbe_stats ixgbe_gstrings_stats[] = {
-	IXGBE_STAT("rx_pkts_nic", stats.gprc),
-	IXGBE_STAT("tx_pkts_nic", stats.gptc),
-	IXGBE_STAT("rx_bytes_nic", stats.gorc),
-	IXGBE_STAT("tx_bytes_nic", stats.gotc),
-	IXGBE_STAT("lsc_int", lsc_int),
-	IXGBE_STAT("tx_busy", tx_busy),
-	IXGBE_STAT("non_eop_descs", non_eop_descs),
-#ifndef CONFIG_IXGBE_NAPI
-	IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog),
-#endif
-	IXGBE_STAT("broadcast", stats.bprc),
-	IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) ,
-	IXGBE_STAT("tx_timeout_count", tx_timeout_count),
-	IXGBE_STAT("tx_restart_queue", restart_queue),
-	IXGBE_STAT("rx_long_length_errors", stats.roc),
-	IXGBE_STAT("rx_short_length_errors", stats.ruc),
-	IXGBE_STAT("tx_flow_control_xon", stats.lxontxc),
-	IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc),
-	IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc),
-	IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc),
-	IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error),
-	IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
-	IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
-#ifndef IXGBE_NO_LRO
-	IXGBE_STAT("lro_aggregated", lro_stats.coal),
-	IXGBE_STAT("lro_flushed", lro_stats.flushed),
-#endif /* IXGBE_NO_LRO */
-	IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources),
-	IXGBE_STAT("hw_rsc_aggregated", rsc_total_count),
-	IXGBE_STAT("hw_rsc_flushed", rsc_total_flush),
-#ifdef HAVE_TX_MQ
-	IXGBE_STAT("fdir_match", stats.fdirmatch),
-	IXGBE_STAT("fdir_miss", stats.fdirmiss),
-	IXGBE_STAT("fdir_overflow", fdir_overflow),
-#endif /* HAVE_TX_MQ */
-#ifdef IXGBE_FCOE
-	IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc),
-	IXGBE_STAT("fcoe_last_errors", stats.fclast),
-	IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc),
-	IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc),
-	IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc),
-	IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp),
-	IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff),
-	IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc),
-	IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc),
-#endif /* IXGBE_FCOE */
-	IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
-	IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
-	IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc),
-	IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc),
-};
-
-#define IXGBE_QUEUE_STATS_LEN \
-	((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \
-	 ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \
-	  (sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
-#define IXGBE_GLOBAL_STATS_LEN	ARRAY_SIZE(ixgbe_gstrings_stats)
-#define IXGBE_NETDEV_STATS_LEN	ARRAY_SIZE(ixgbe_gstrings_net_stats)
-#define IXGBE_PB_STATS_LEN ( \
-		(((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \
-		 IXGBE_FLAG_DCB_ENABLED) ? \
-		 (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
-		  sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
-		  sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
-		  sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
-		 / sizeof(u64) : 0)
-#define IXGBE_VF_STATS_LEN \
-	((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \
-	  (sizeof(struct vf_stats) / sizeof(u64)))
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
-			 IXGBE_NETDEV_STATS_LEN + \
-			 IXGBE_PB_STATS_LEN + \
-			 IXGBE_QUEUE_STATS_LEN + \
-			 IXGBE_VF_STATS_LEN)
-
-#endif /* ETHTOOL_GSTATS */
-#ifdef ETHTOOL_TEST
-static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
-	"Register test  (offline)", "Eeprom test    (offline)",
-	"Interrupt test (offline)", "Loopback test  (offline)",
-	"Link test   (on/offline)"
-};
-#define IXGBE_TEST_LEN	(sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
-#endif /* ETHTOOL_TEST */
-
-int ixgbe_get_settings(struct net_device *netdev,
-		       struct ethtool_cmd *ecmd)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 link_speed = 0;
-	bool link_up;
-
-	ecmd->supported = SUPPORTED_10000baseT_Full;
-	ecmd->autoneg = AUTONEG_ENABLE;
-	ecmd->transceiver = XCVR_EXTERNAL;
-	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
-	    (hw->phy.multispeed_fiber)) {
-		ecmd->supported |= (SUPPORTED_1000baseT_Full |
-				    SUPPORTED_Autoneg);
-		switch (hw->mac.type) {
-		case ixgbe_mac_X540:
-			ecmd->supported |= SUPPORTED_100baseT_Full;
-			break;
-		default:
-			break;
-		}
-
-		ecmd->advertising = ADVERTISED_Autoneg;
-		if (hw->phy.autoneg_advertised) {
-			if (hw->phy.autoneg_advertised &
-			    IXGBE_LINK_SPEED_100_FULL)
-				ecmd->advertising |= ADVERTISED_100baseT_Full;
-			if (hw->phy.autoneg_advertised &
-			    IXGBE_LINK_SPEED_10GB_FULL)
-				ecmd->advertising |= ADVERTISED_10000baseT_Full;
-			if (hw->phy.autoneg_advertised &
-			    IXGBE_LINK_SPEED_1GB_FULL)
-				ecmd->advertising |= ADVERTISED_1000baseT_Full;
-		} else {
-			/*
-			 * Default advertised modes in case
-			 * phy.autoneg_advertised isn't set.
-			 */
-			ecmd->advertising |= (ADVERTISED_10000baseT_Full |
-					      ADVERTISED_1000baseT_Full);
-			if (hw->mac.type == ixgbe_mac_X540)
-				ecmd->advertising |= ADVERTISED_100baseT_Full;
-		}
-
-		if (hw->phy.media_type == ixgbe_media_type_copper) {
-			ecmd->supported |= SUPPORTED_TP;
-			ecmd->advertising |= ADVERTISED_TP;
-			ecmd->port = PORT_TP;
-		} else {
-			ecmd->supported |= SUPPORTED_FIBRE;
-			ecmd->advertising |= ADVERTISED_FIBRE;
-			ecmd->port = PORT_FIBRE;
-		}
-	} else if (hw->phy.media_type == ixgbe_media_type_backplane) {
-		/* Set as FIBRE until SERDES defined in kernel */
-		if (hw->device_id == IXGBE_DEV_ID_82598_BX) {
-			ecmd->supported = (SUPPORTED_1000baseT_Full |
-					   SUPPORTED_FIBRE);
-			ecmd->advertising = (ADVERTISED_1000baseT_Full |
-					     ADVERTISED_FIBRE);
-			ecmd->port = PORT_FIBRE;
-			ecmd->autoneg = AUTONEG_DISABLE;
-		} else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
-			  || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) {
-			ecmd->supported |= (SUPPORTED_1000baseT_Full |
-					    SUPPORTED_Autoneg |
-					    SUPPORTED_FIBRE);
-			ecmd->advertising = (ADVERTISED_10000baseT_Full |
-					     ADVERTISED_1000baseT_Full |
-					     ADVERTISED_Autoneg |
-					     ADVERTISED_FIBRE);
-			ecmd->port = PORT_FIBRE;
-		} else {
-			ecmd->supported |= (SUPPORTED_1000baseT_Full |
-					    SUPPORTED_FIBRE);
-			ecmd->advertising = (ADVERTISED_10000baseT_Full |
-					     ADVERTISED_1000baseT_Full |
-					     ADVERTISED_FIBRE);
-			ecmd->port = PORT_FIBRE;
-		}
-	} else {
-		ecmd->supported |= SUPPORTED_FIBRE;
-		ecmd->advertising = (ADVERTISED_10000baseT_Full |
-				     ADVERTISED_FIBRE);
-		ecmd->port = PORT_FIBRE;
-		ecmd->autoneg = AUTONEG_DISABLE;
-	}
-
-#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT
-	/* Get PHY type */
-	switch (adapter->hw.phy.type) {
-	case ixgbe_phy_tn:
-	case ixgbe_phy_aq:
-	case ixgbe_phy_cu_unknown:
-		/* Copper 10G-BASET */
-		ecmd->port = PORT_TP;
-		break;
-	case ixgbe_phy_qt:
-		ecmd->port = PORT_FIBRE;
-		break;
-	case ixgbe_phy_nl:
-	case ixgbe_phy_sfp_passive_tyco:
-	case ixgbe_phy_sfp_passive_unknown:
-	case ixgbe_phy_sfp_ftl:
-	case ixgbe_phy_sfp_avago:
-	case ixgbe_phy_sfp_intel:
-	case ixgbe_phy_sfp_unknown:
-		switch (adapter->hw.phy.sfp_type) {
-		/* SFP+ devices, further checking needed */
-		case ixgbe_sfp_type_da_cu:
-		case ixgbe_sfp_type_da_cu_core0:
-		case ixgbe_sfp_type_da_cu_core1:
-			ecmd->port = PORT_DA;
-			break;
-		case ixgbe_sfp_type_sr:
-		case ixgbe_sfp_type_lr:
-		case ixgbe_sfp_type_srlr_core0:
-		case ixgbe_sfp_type_srlr_core1:
-			ecmd->port = PORT_FIBRE;
-			break;
-		case ixgbe_sfp_type_not_present:
-			ecmd->port = PORT_NONE;
-			break;
-		case ixgbe_sfp_type_1g_cu_core0:
-		case ixgbe_sfp_type_1g_cu_core1:
-			ecmd->port = PORT_TP;
-			ecmd->supported = SUPPORTED_TP;
-			ecmd->advertising = (ADVERTISED_1000baseT_Full |
-				ADVERTISED_TP);
-			break;
-		case ixgbe_sfp_type_1g_sx_core0:
-		case ixgbe_sfp_type_1g_sx_core1:
-			ecmd->port = PORT_FIBRE;
-			ecmd->supported = SUPPORTED_FIBRE;
-			ecmd->advertising = (ADVERTISED_1000baseT_Full |
-				ADVERTISED_FIBRE);
-			break;
-		case ixgbe_sfp_type_unknown:
-		default:
-			ecmd->port = PORT_OTHER;
-			break;
-		}
-		break;
-	case ixgbe_phy_xaui:
-		ecmd->port = PORT_NONE;
-		break;
-	case ixgbe_phy_unknown:
-	case ixgbe_phy_generic:
-	case ixgbe_phy_sfp_unsupported:
-	default:
-		ecmd->port = PORT_OTHER;
-		break;
-	}
-#endif
-
-	if (!in_interrupt()) {
-		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-	} else {
-		/*
-		 * this case is a special workaround for RHEL5 bonding
-		 * that calls this routine from interrupt context
-		 */
-		link_speed = adapter->link_speed;
-		link_up = adapter->link_up;
-	}
-
-	if (link_up) {
-		switch (link_speed) {
-		case IXGBE_LINK_SPEED_10GB_FULL:
-			ecmd->speed = SPEED_10000;
-			break;
-		case IXGBE_LINK_SPEED_1GB_FULL:
-			ecmd->speed = SPEED_1000;
-			break;
-		case IXGBE_LINK_SPEED_100_FULL:
-			ecmd->speed = SPEED_100;
-			break;
-		default:
-			break;
-		}
-		ecmd->duplex = DUPLEX_FULL;
-	} else {
-		ecmd->speed = -1;
-		ecmd->duplex = -1;
-	}
-
-	return 0;
-}
-
-static int ixgbe_set_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 advertised, old;
-	s32 err = 0;
-
-	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
-	    (hw->phy.multispeed_fiber)) {
-		/*
-		 * this function does not support duplex forcing, but can
-		 * limit the advertising of the adapter to the specified speed
-		 */
-		if (ecmd->autoneg == AUTONEG_DISABLE)
-			return -EINVAL;
-
-		if (ecmd->advertising & ~ecmd->supported)
-			return -EINVAL;
-
-		old = hw->phy.autoneg_advertised;
-		advertised = 0;
-		if (ecmd->advertising & ADVERTISED_10000baseT_Full)
-			advertised |= IXGBE_LINK_SPEED_10GB_FULL;
-
-		if (ecmd->advertising & ADVERTISED_1000baseT_Full)
-			advertised |= IXGBE_LINK_SPEED_1GB_FULL;
-
-		if (ecmd->advertising & ADVERTISED_100baseT_Full)
-			advertised |= IXGBE_LINK_SPEED_100_FULL;
-
-		if (old == advertised)
-			return err;
-		/* this sets the link speed and restarts auto-neg */
-		hw->mac.autotry_restart = true;
-		err = hw->mac.ops.setup_link(hw, advertised, true, true);
-		if (err) {
-			e_info(probe, "setup link failed with code %d\n", err);
-			hw->mac.ops.setup_link(hw, old, true, true);
-		}
-	}
-	return err;
-}
-
-static void ixgbe_get_pauseparam(struct net_device *netdev,
-				 struct ethtool_pauseparam *pause)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	if (hw->fc.disable_fc_autoneg)
-		pause->autoneg = 0;
-	else
-		pause->autoneg = 1;
-
-	if (hw->fc.current_mode == ixgbe_fc_rx_pause) {
-		pause->rx_pause = 1;
-	} else if (hw->fc.current_mode == ixgbe_fc_tx_pause) {
-		pause->tx_pause = 1;
-	} else if (hw->fc.current_mode == ixgbe_fc_full) {
-		pause->rx_pause = 1;
-		pause->tx_pause = 1;
-	}
-}
-
-static int ixgbe_set_pauseparam(struct net_device *netdev,
-				struct ethtool_pauseparam *pause)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_fc_info fc = hw->fc;
-
-	/* 82598 does no support link flow control with DCB enabled */
-	if ((hw->mac.type == ixgbe_mac_82598EB) &&
-	    (adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-		return -EINVAL;
-
-	fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE);
-
-	if ((pause->rx_pause && pause->tx_pause) || pause->autoneg)
-		fc.requested_mode = ixgbe_fc_full;
-	else if (pause->rx_pause)
-		fc.requested_mode = ixgbe_fc_rx_pause;
-	else if (pause->tx_pause)
-		fc.requested_mode = ixgbe_fc_tx_pause;
-	else
-		fc.requested_mode = ixgbe_fc_none;
-
-	/* if the thing changed then we'll update and use new autoneg */
-	if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) {
-		hw->fc = fc;
-		if (netif_running(netdev))
-			ixgbe_reinit_locked(adapter);
-		else
-			ixgbe_reset(adapter);
-	}
-
-	return 0;
-}
-
-static u32 ixgbe_get_msglevel(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	return adapter->msg_enable;
-}
-
-static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	adapter->msg_enable = data;
-}
-
-static int ixgbe_get_regs_len(struct net_device *netdev)
-{
-#define IXGBE_REGS_LEN  1129
-	return IXGBE_REGS_LEN * sizeof(u32);
-}
-
-#define IXGBE_GET_STAT(_A_, _R_)	(_A_->stats._R_)
-
-
-static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
-			   void *p)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 *regs_buff = p;
-	u8 i;
-
-	printk(KERN_DEBUG "ixgbe_get_regs_1\n");
-	memset(p, 0, IXGBE_REGS_LEN * sizeof(u32));
-	printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr);
-
-	regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id;
-
-	/* General Registers */
-	regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL);
-	printk(KERN_DEBUG "ixgbe_get_regs_3\n");
-	regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS);
-	regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
-	regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP);
-	regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP);
-	regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-	regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER);
-	regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER);
-
-	printk(KERN_DEBUG "ixgbe_get_regs_4\n");
-
-	/* NVM Register */
-	regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC);
-	regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD);
-	regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA);
-	regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL);
-	regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA);
-	regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL);
-	regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA);
-	regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT);
-	regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP);
-	regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC);
-
-	/* Interrupt */
-	/* don't read EICR because it can clear interrupt causes, instead
-	 * read EICS which is a shadow but doesn't clear EICR */
-	regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS);
-	regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS);
-	regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS);
-	regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC);
-	regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC);
-	regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM);
-	regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0));
-	regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0));
-	regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT);
-	regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA);
-	regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0));
-	regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE);
-
-	/* Flow Control */
-	regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP);
-	regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0));
-	regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1));
-	regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2));
-	regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3));
-	for (i = 0; i < 8; i++) {
-		switch (hw->mac.type) {
-		case ixgbe_mac_82598EB:
-			regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i));
-			regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i));
-			break;
-		case ixgbe_mac_82599EB:
-		case ixgbe_mac_X540:
-			regs_buff[35 + i] = IXGBE_READ_REG(hw,
-							  IXGBE_FCRTL_82599(i));
-			regs_buff[43 + i] = IXGBE_READ_REG(hw,
-							  IXGBE_FCRTH_82599(i));
-			break;
-		default:
-			break;
-		}
-	}
-	regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV);
-	regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS);
-
-	/* Receive DMA */
-	for (i = 0; i < 64; i++)
-		regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
-	for (i = 0; i < 64; i++)
-		regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
-	for (i = 0; i < 64; i++)
-		regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
-	for (i = 0; i < 64; i++)
-		regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
-	for (i = 0; i < 64; i++)
-		regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
-	for (i = 0; i < 64; i++)
-		regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
-	for (i = 0; i < 16; i++)
-		regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
-	for (i = 0; i < 16; i++)
-		regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-	regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
-	for (i = 0; i < 8; i++)
-		regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
-	regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
-	regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN);
-
-	/* Receive */
-	regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
-	regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL);
-	for (i = 0; i < 16; i++)
-		regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i));
-	for (i = 0; i < 16; i++)
-		regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i));
-	regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0));
-	regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-	regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-	regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL);
-	regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC);
-	regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
-	for (i = 0; i < 8; i++)
-		regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i));
-	regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP);
-
-	/* Transmit */
-	for (i = 0; i < 32; i++)
-		regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i));
-	for (i = 0; i < 32; i++)
-		regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i));
-	regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
-	for (i = 0; i < 16; i++)
-		regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-	regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG);
-	for (i = 0; i < 8; i++)
-		regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i));
-	regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP);
-
-	/* Wake Up */
-	regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC);
-	regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC);
-	regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS);
-	regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV);
-	regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT);
-	regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT);
-	regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL);
-	regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM);
-	regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0));
-
-	/* DCB */
-	regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS);
-	regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS);
-	regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
-	regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR);
-	for (i = 0; i < 8; i++)
-		regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i));
-	for (i = 0; i < 8; i++)
-		regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i));
-
-	/* Statistics */
-	regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs);
-	regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc);
-	regs_buff[883] = IXGBE_GET_STAT(adapter, errbc);
-	regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc);
-	for (i = 0; i < 8; i++)
-		regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]);
-	regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc);
-	regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc);
-	regs_buff[895] = IXGBE_GET_STAT(adapter, rlec);
-	regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc);
-	regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc);
-	regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc);
-	regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc);
-	for (i = 0; i < 8; i++)
-		regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]);
-	for (i = 0; i < 8; i++)
-		regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]);
-	for (i = 0; i < 8; i++)
-		regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]);
-	for (i = 0; i < 8; i++)
-		regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]);
-	regs_buff[932] = IXGBE_GET_STAT(adapter, prc64);
-	regs_buff[933] = IXGBE_GET_STAT(adapter, prc127);
-	regs_buff[934] = IXGBE_GET_STAT(adapter, prc255);
-	regs_buff[935] = IXGBE_GET_STAT(adapter, prc511);
-	regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023);
-	regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522);
-	regs_buff[938] = IXGBE_GET_STAT(adapter, gprc);
-	regs_buff[939] = IXGBE_GET_STAT(adapter, bprc);
-	regs_buff[940] = IXGBE_GET_STAT(adapter, mprc);
-	regs_buff[941] = IXGBE_GET_STAT(adapter, gptc);
-	regs_buff[942] = IXGBE_GET_STAT(adapter, gorc);
-	regs_buff[944] = IXGBE_GET_STAT(adapter, gotc);
-	for (i = 0; i < 8; i++)
-		regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]);
-	regs_buff[954] = IXGBE_GET_STAT(adapter, ruc);
-	regs_buff[955] = IXGBE_GET_STAT(adapter, rfc);
-	regs_buff[956] = IXGBE_GET_STAT(adapter, roc);
-	regs_buff[957] = IXGBE_GET_STAT(adapter, rjc);
-	regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc);
-	regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc);
-	regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc);
-	regs_buff[961] = IXGBE_GET_STAT(adapter, tor);
-	regs_buff[963] = IXGBE_GET_STAT(adapter, tpr);
-	regs_buff[964] = IXGBE_GET_STAT(adapter, tpt);
-	regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64);
-	regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127);
-	regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255);
-	regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511);
-	regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023);
-	regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522);
-	regs_buff[971] = IXGBE_GET_STAT(adapter, mptc);
-	regs_buff[972] = IXGBE_GET_STAT(adapter, bptc);
-	regs_buff[973] = IXGBE_GET_STAT(adapter, xec);
-	for (i = 0; i < 16; i++)
-		regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]);
-	for (i = 0; i < 16; i++)
-		regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]);
-	for (i = 0; i < 16; i++)
-		regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]);
-	for (i = 0; i < 16; i++)
-		regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]);
-
-	/* MAC */
-	regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG);
-	regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
-	regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
-	regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0);
-	regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1);
-	regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
-	regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
-	regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP);
-	regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP);
-	regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-	regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1);
-	regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP);
-	regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA);
-	regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE);
-	regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD);
-	regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS);
-	regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA);
-	regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD);
-	regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD);
-	regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD);
-	regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG);
-	regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1);
-	regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2);
-	regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS);
-	regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC);
-	regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS);
-	regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS);
-	regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
-	regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3);
-	regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1);
-	regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2);
-	regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
-
-	/* Diagnostic */
-	regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL);
-	for (i = 0; i < 8; i++)
-		regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i));
-	regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN);
-	for (i = 0; i < 4; i++)
-		regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i));
-	regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE);
-	regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL);
-	for (i = 0; i < 8; i++)
-		regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i));
-	regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN);
-	for (i = 0; i < 4; i++)
-		regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i));
-	regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE);
-	regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL);
-	regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0);
-	regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1);
-	regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2);
-	regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3);
-	regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL);
-	regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0);
-	regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1);
-	regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2);
-	regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3);
-	for (i = 0; i < 8; i++)
-		regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i));
-	regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL);
-	regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1);
-	regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2);
-	regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1);
-	regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2);
-	regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS);
-	regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL);
-	regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC);
-	regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC);
-
-	/* 82599 X540 specific registers  */
-	regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN);
-}
-
-static int ixgbe_get_eeprom_len(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	return adapter->hw.eeprom.word_size * 2;
-}
-
-static int ixgbe_get_eeprom(struct net_device *netdev,
-			    struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u16 *eeprom_buff;
-	int first_word, last_word, eeprom_len;
-	int ret_val = 0;
-	u16 i;
-
-	if (eeprom->len == 0)
-		return -EINVAL;
-
-	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-	eeprom_len = last_word - first_word + 1;
-
-	eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len,
-					   eeprom_buff);
-
-	/* Device's eeprom is always little-endian, word addressable */
-	for (i = 0; i < eeprom_len; i++)
-		le16_to_cpus(&eeprom_buff[i]);
-
-	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
-	kfree(eeprom_buff);
-
-	return ret_val;
-}
-
-static int ixgbe_set_eeprom(struct net_device *netdev,
-			    struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u16 *eeprom_buff;
-	void *ptr;
-	int max_len, first_word, last_word, ret_val = 0;
-	u16 i;
-
-	if (eeprom->len == 0)
-		return -EINVAL;
-
-	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
-		return -EINVAL;
-
-	max_len = hw->eeprom.word_size * 2;
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	ptr = eeprom_buff;
-
-	if (eeprom->offset & 1) {
-		/*
-		 * need read/modify/write of first changed EEPROM word
-		 * only the second byte of the word is being modified
-		 */
-		ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]);
-		if (ret_val)
-			goto err;
-
-		ptr++;
-	}
-	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
-		/*
-		 * need read/modify/write of last changed EEPROM word
-		 * only the first byte of the word is being modified
-		 */
-		ret_val = ixgbe_read_eeprom(hw, last_word,
-					  &eeprom_buff[last_word - first_word]);
-		if (ret_val)
-			goto err;
-	}
-
-	/* Device's eeprom is always little-endian, word addressable */
-	for (i = 0; i < last_word - first_word + 1; i++)
-		le16_to_cpus(&eeprom_buff[i]);
-
-	memcpy(ptr, bytes, eeprom->len);
-
-	for (i = 0; i < last_word - first_word + 1; i++)
-		cpu_to_le16s(&eeprom_buff[i]);
-
-	ret_val = ixgbe_write_eeprom_buffer(hw, first_word,
-					    last_word - first_word + 1,
-					    eeprom_buff);
-
-	/* Update the checksum */
-	if (ret_val == 0)
-		ixgbe_update_eeprom_checksum(hw);
-
-err:
-	kfree(eeprom_buff);
-	return ret_val;
-}
-
-static void ixgbe_get_drvinfo(struct net_device *netdev,
-			      struct ethtool_drvinfo *drvinfo)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
-
-	strlcpy(drvinfo->version, ixgbe_driver_version,
-				sizeof(drvinfo->version));
-
-	strlcpy(drvinfo->fw_version, adapter->eeprom_id,
-				sizeof(drvinfo->fw_version));
-
-	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
-				sizeof(drvinfo->bus_info));
-
-	drvinfo->n_stats = IXGBE_STATS_LEN;
-	drvinfo->testinfo_len = IXGBE_TEST_LEN;
-	drvinfo->regdump_len = ixgbe_get_regs_len(netdev);
-}
-
-static void ixgbe_get_ringparam(struct net_device *netdev,
-				struct ethtool_ringparam *ring)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	ring->rx_max_pending = IXGBE_MAX_RXD;
-	ring->tx_max_pending = IXGBE_MAX_TXD;
-	ring->rx_mini_max_pending = 0;
-	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = adapter->rx_ring_count;
-	ring->tx_pending = adapter->tx_ring_count;
-	ring->rx_mini_pending = 0;
-	ring->rx_jumbo_pending = 0;
-}
-
-static int ixgbe_set_ringparam(struct net_device *netdev,
-			       struct ethtool_ringparam *ring)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL;
-	u32 new_rx_count, new_tx_count;
-	int i, err = 0;
-
-	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
-		return -EINVAL;
-
-	new_tx_count = clamp_t(u32, ring->tx_pending,
-			       IXGBE_MIN_TXD, IXGBE_MAX_TXD);
-	new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
-
-	new_rx_count = clamp_t(u32, ring->rx_pending,
-			       IXGBE_MIN_RXD, IXGBE_MAX_RXD);
-	new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
-
-	/* if nothing to do return success */
-	if ((new_tx_count == adapter->tx_ring_count) &&
-	    (new_rx_count == adapter->rx_ring_count))
-		return 0;
-
-	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-
-	if (!netif_running(adapter->netdev)) {
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			adapter->tx_ring[i]->count = new_tx_count;
-		for (i = 0; i < adapter->num_rx_queues; i++)
-			adapter->rx_ring[i]->count = new_rx_count;
-		adapter->tx_ring_count = new_tx_count;
-		adapter->rx_ring_count = new_rx_count;
-		goto clear_reset;
-	}
-
-	/* alloc updated Tx resources */
-	if (new_tx_count != adapter->tx_ring_count) {
-		tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
-		if (!tx_ring) {
-			err = -ENOMEM;
-			goto clear_reset;
-		}
-
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			/* clone ring and setup updated count */
-			tx_ring[i] = *adapter->tx_ring[i];
-			tx_ring[i].count = new_tx_count;
-			err = ixgbe_setup_tx_resources(&tx_ring[i]);
-			if (err) {
-				while (i) {
-					i--;
-					ixgbe_free_tx_resources(&tx_ring[i]);
-				}
-
-				vfree(tx_ring);
-				tx_ring = NULL;
-
-				goto clear_reset;
-			}
-		}
-	}
-
-	/* alloc updated Rx resources */
-	if (new_rx_count != adapter->rx_ring_count) {
-		rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring));
-		if (!rx_ring) {
-			err = -ENOMEM;
-			goto clear_reset;
-		}
-
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			/* clone ring and setup updated count */
-			rx_ring[i] = *adapter->rx_ring[i];
-			rx_ring[i].count = new_rx_count;
-			err = ixgbe_setup_rx_resources(&rx_ring[i]);
-			if (err) {
-				while (i) {
-					i--;
-					ixgbe_free_rx_resources(&rx_ring[i]);
-				}
-
-				vfree(rx_ring);
-				rx_ring = NULL;
-
-				goto clear_reset;
-			}
-		}
-	}
-
-	/* bring interface down to prepare for update */
-	ixgbe_down(adapter);
-
-	/* Tx */
-	if (tx_ring) {
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			ixgbe_free_tx_resources(adapter->tx_ring[i]);
-			*adapter->tx_ring[i] = tx_ring[i];
-		}
-		adapter->tx_ring_count = new_tx_count;
-
-		vfree(tx_ring);
-		tx_ring = NULL;
-	}
-
-	/* Rx */
-	if (rx_ring) {
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			ixgbe_free_rx_resources(adapter->rx_ring[i]);
-			*adapter->rx_ring[i] = rx_ring[i];
-		}
-		adapter->rx_ring_count = new_rx_count;
-
-		vfree(rx_ring);
-		rx_ring = NULL;
-	}
-
-	/* restore interface using new values */
-	ixgbe_up(adapter);
-
-clear_reset:
-	/* free Tx resources if Rx error is encountered */
-	if (tx_ring) {
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			ixgbe_free_tx_resources(&tx_ring[i]);
-		vfree(tx_ring);
-	}
-
-	clear_bit(__IXGBE_RESETTING, &adapter->state);
-	return err;
-}
-
-#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
-static int ixgbe_get_stats_count(struct net_device *netdev)
-{
-	return IXGBE_STATS_LEN;
-}
-
-#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
-static int ixgbe_get_sset_count(struct net_device *netdev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_TEST:
-		return IXGBE_TEST_LEN;
-	case ETH_SS_STATS:
-		return IXGBE_STATS_LEN;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
-static void ixgbe_get_ethtool_stats(struct net_device *netdev,
-				    struct ethtool_stats *stats, u64 *data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats *net_stats = &netdev->stats;
-#else
-	struct net_device_stats *net_stats = &adapter->net_stats;
-#endif
-	u64 *queue_stat;
-	int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64);
-	int i, j, k;
-	char *p;
-
-	printk(KERN_DEBUG "ixgbe_stats 0\n");
-	ixgbe_update_stats(adapter);
-	printk(KERN_DEBUG "ixgbe_stats 1\n");
-
-	for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
-		p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset;
-		data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat ==
-			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-	}
-	for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) {
-		p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset;
-		data[i] = (ixgbe_gstrings_stats[j].sizeof_stat ==
-			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-	}
-	printk(KERN_DEBUG "ixgbe_stats 2\n");
-#ifdef NO_VNIC
-	for (j = 0; j < adapter->num_tx_queues; j++) {
-		queue_stat = (u64 *)&adapter->tx_ring[j]->stats;
-		for (k = 0; k < stat_count; k++)
-			data[i + k] = queue_stat[k];
-		i += k;
-	}
-	for (j = 0; j < adapter->num_rx_queues; j++) {
-		queue_stat = (u64 *)&adapter->rx_ring[j]->stats;
-		for (k = 0; k < stat_count; k++)
-			data[i + k] = queue_stat[k];
-		i += k;
-	}
-	printk(KERN_DEBUG "ixgbe_stats 3\n");
-#endif
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-		for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
-			data[i++] = adapter->stats.pxontxc[j];
-			data[i++] = adapter->stats.pxofftxc[j];
-		}
-		for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
-			data[i++] = adapter->stats.pxonrxc[j];
-			data[i++] = adapter->stats.pxoffrxc[j];
-		}
-	}
-	printk(KERN_DEBUG "ixgbe_stats 4\n");
-	stat_count = sizeof(struct vf_stats) / sizeof(u64);
-	for (j = 0; j < adapter->num_vfs; j++) {
-		queue_stat = (u64 *)&adapter->vfinfo[j].vfstats;
-		for (k = 0; k < stat_count; k++)
-			data[i + k] = queue_stat[k];
-		queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats;
-		for (k = 0; k < stat_count; k++)
-			data[i + k] += queue_stat[k];
-		i += k;
-	}
-}
-
-static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
-			      u8 *data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	char *p = (char *)data;
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_TEST:
-		memcpy(data, *ixgbe_gstrings_test,
-			IXGBE_TEST_LEN * ETH_GSTRING_LEN);
-		break;
-	case ETH_SS_STATS:
-		for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
-			memcpy(p, ixgbe_gstrings_net_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, ixgbe_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-			for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
-				sprintf(p, "tx_pb_%u_pxon", i);
-				p += ETH_GSTRING_LEN;
-				sprintf(p, "tx_pb_%u_pxoff", i);
-				p += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
-				sprintf(p, "rx_pb_%u_pxon", i);
-				p += ETH_GSTRING_LEN;
-				sprintf(p, "rx_pb_%u_pxoff", i);
-				p += ETH_GSTRING_LEN;
-			}
-		}
-		for (i = 0; i < adapter->num_vfs; i++) {
-			sprintf(p, "VF %d Rx Packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "VF %d Rx Bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "VF %d Tx Packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "VF %d Tx Bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "VF %d MC Packets", i);
-			p += ETH_GSTRING_LEN;
-		}
-		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
-		break;
-	}
-}
-
-static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	bool link_up;
-	u32 link_speed = 0;
-	*data = 0;
-
-	hw->mac.ops.check_link(hw, &link_speed, &link_up, true);
-	if (link_up)
-		return *data;
-	else
-		*data = 1;
-	return *data;
-}
-
-/* ethtool register test data */
-struct ixgbe_reg_test {
-	u16 reg;
-	u8  array_len;
-	u8  test_type;
-	u32 mask;
-	u32 write;
-};
-
-/* In the hardware, registers are laid out either singly, in arrays
- * spaced 0x40 bytes apart, or in contiguous tables.  We assume
- * most tests take place on arrays or single registers (handled
- * as a single-element array) and special-case the tables.
- * Table tests are always pattern tests.
- *
- * We also make provision for some required setup steps by specifying
- * registers to be written without any read-back testing.
- */
-
-#define PATTERN_TEST	1
-#define SET_READ_TEST	2
-#define WRITE_NO_TEST	3
-#define TABLE32_TEST	4
-#define TABLE64_TEST_LO	5
-#define TABLE64_TEST_HI	6
-
-/* default 82599 register test */
-static struct ixgbe_reg_test reg_test_82599[] = {
-	{ IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
-	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
-	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
-	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
-	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 },
-	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 },
-	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF },
-	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-/* default 82598 register test */
-static struct ixgbe_reg_test reg_test_82598[] = {
-	{ IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
-	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	/* Enable all four RX queues before testing. */
-	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
-	/* RDH is read-only for 82598, only test RDT. */
-	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
-	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
-	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
-	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF },
-	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
-	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
-	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 },
-	{ IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 },
-	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF },
-	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
-	{ 0, 0, 0, 0 }
-};
-
-#define REG_PATTERN_TEST(R, M, W)					      \
-{									      \
-	u32 pat, val, before;						      \
-	const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \
-	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {			      \
-		before = readl(adapter->hw.hw_addr + R);		      \
-		writel((_test[pat] & W), (adapter->hw.hw_addr + R));	      \
-		val = readl(adapter->hw.hw_addr + R);			      \
-		if (val != (_test[pat] & W & M)) {			      \
-			e_err(drv, "pattern test reg %04X failed: got "	      \
-			      "0x%08X expected 0x%08X\n",		      \
-				R, val, (_test[pat] & W & M));		      \
-			*data = R;					      \
-			writel(before, adapter->hw.hw_addr + R);	      \
-			return 1;					      \
-		}							      \
-		writel(before, adapter->hw.hw_addr + R);		      \
-	}								      \
-}
-
-#define REG_SET_AND_CHECK(R, M, W)					      \
-{									      \
-	u32 val, before;						      \
-	before = readl(adapter->hw.hw_addr + R);			      \
-	writel((W & M), (adapter->hw.hw_addr + R));			      \
-	val = readl(adapter->hw.hw_addr + R);				      \
-	if ((W & M) != (val & M)) {					      \
-		e_err(drv, "set/check reg %04X test failed: got 0x%08X "      \
-		      "expected 0x%08X\n", R, (val & M), (W & M));	      \
-		*data = R;						      \
-		writel(before, (adapter->hw.hw_addr + R));		      \
-		return 1;						      \
-	}								      \
-	writel(before, (adapter->hw.hw_addr + R));			      \
-}
-
-static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
-{
-	struct ixgbe_reg_test *test;
-	u32 value, status_before, status_after;
-	u32 i, toggle;
-
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_82598EB:
-		toggle = 0x7FFFF3FF;
-		test = reg_test_82598;
-		break;
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		toggle = 0x7FFFF30F;
-		test = reg_test_82599;
-		break;
-	default:
-		*data = 1;
-		return 1;
-		break;
-	}
-
-	/*
-	 * Because the status register is such a special case,
-	 * we handle it separately from the rest of the register
-	 * tests.  Some bits are read-only, some toggle, and some
-	 * are writeable on newer MACs.
-	 */
-	status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS);
-	value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle);
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle);
-	status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle;
-	if (value != status_after) {
-		e_err(drv, "failed STATUS register test got: "
-		      "0x%08X expected: 0x%08X\n", status_after, value);
-		*data = 1;
-		return 1;
-	}
-	/* restore previous status */
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before);
-
-	/*
-	 * Perform the remainder of the register test, looping through
-	 * the test table until we either fail or reach the null entry.
-	 */
-	while (test->reg) {
-		for (i = 0; i < test->array_len; i++) {
-			switch (test->test_type) {
-			case PATTERN_TEST:
-				REG_PATTERN_TEST(test->reg + (i * 0x40),
-						test->mask,
-						test->write);
-				break;
-			case SET_READ_TEST:
-				REG_SET_AND_CHECK(test->reg + (i * 0x40),
-						test->mask,
-						test->write);
-				break;
-			case WRITE_NO_TEST:
-				writel(test->write,
-				       (adapter->hw.hw_addr + test->reg)
-				       + (i * 0x40));
-				break;
-			case TABLE32_TEST:
-				REG_PATTERN_TEST(test->reg + (i * 4),
-						test->mask,
-						test->write);
-				break;
-			case TABLE64_TEST_LO:
-				REG_PATTERN_TEST(test->reg + (i * 8),
-						test->mask,
-						test->write);
-				break;
-			case TABLE64_TEST_HI:
-				REG_PATTERN_TEST((test->reg + 4) + (i * 8),
-						test->mask,
-						test->write);
-				break;
-			}
-		}
-		test++;
-	}
-
-	*data = 0;
-	return 0;
-}
-
-static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
-{
-	if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL))
-		*data = 1;
-	else
-		*data = 0;
-	return *data;
-}
-
-static irqreturn_t ixgbe_test_intr(int irq, void *data)
-{
-	struct net_device *netdev = data;
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
-
-	return IRQ_HANDLED;
-}
-
-static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
-{
-	struct net_device *netdev = adapter->netdev;
-	u32 mask, i = 0, shared_int = true;
-	u32 irq = adapter->pdev->irq;
-
-	*data = 0;
-
-	/* Hook up test interrupt handler just for this test */
-	if (adapter->msix_entries) {
-		/* NOTE: we don't test MSI-X interrupts here, yet */
-		return 0;
-	} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
-		shared_int = false;
-		if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name,
-				netdev)) {
-			*data = 1;
-			return -1;
-		}
-	} else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED,
-				netdev->name, netdev)) {
-		shared_int = false;
-	} else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED,
-			       netdev->name, netdev)) {
-		*data = 1;
-		return -1;
-	}
-	e_info(hw, "testing %s interrupt\n",
-	       (shared_int ? "shared" : "unshared"));
-
-	/* Disable all the interrupts */
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
-	IXGBE_WRITE_FLUSH(&adapter->hw);
-	usleep_range(10000, 20000);
-
-	/* Test each interrupt */
-	for (; i < 10; i++) {
-		/* Interrupt to test */
-		mask = 1 << i;
-
-		if (!shared_int) {
-			/*
-			 * Disable the interrupts to be reported in
-			 * the cause register and then force the same
-			 * interrupt and see if one gets posted.  If
-			 * an interrupt was posted to the bus, the
-			 * test failed.
-			 */
-			adapter->test_icr = 0;
-			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
-					~mask & 0x00007FFF);
-			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
-					~mask & 0x00007FFF);
-			IXGBE_WRITE_FLUSH(&adapter->hw);
-			usleep_range(10000, 20000);
-
-			if (adapter->test_icr & mask) {
-				*data = 3;
-				break;
-			}
-		}
-
-		/*
-		 * Enable the interrupt to be reported in the cause
-		 * register and then force the same interrupt and see
-		 * if one gets posted.  If an interrupt was not posted
-		 * to the bus, the test failed.
-		 */
-		adapter->test_icr = 0;
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
-		IXGBE_WRITE_FLUSH(&adapter->hw);
-		usleep_range(10000, 20000);
-
-		if (!(adapter->test_icr & mask)) {
-			*data = 4;
-			break;
-		}
-
-		if (!shared_int) {
-			/*
-			 * Disable the other interrupts to be reported in
-			 * the cause register and then force the other
-			 * interrupts and see if any get posted.  If
-			 * an interrupt was posted to the bus, the
-			 * test failed.
-			 */
-			adapter->test_icr = 0;
-			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
-					~mask & 0x00007FFF);
-			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
-					~mask & 0x00007FFF);
-			IXGBE_WRITE_FLUSH(&adapter->hw);
-			usleep_range(10000, 20000);
-
-			if (adapter->test_icr) {
-				*data = 5;
-				break;
-			}
-		}
-	}
-
-	/* Disable all the interrupts */
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
-	IXGBE_WRITE_FLUSH(&adapter->hw);
-	usleep_range(10000, 20000);
-
-	/* Unhook test interrupt handler */
-	free_irq(irq, netdev);
-
-	return *data;
-}
-
-
-
-static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 reg_data;
-
-	/* X540 needs to set the MACC.FLU bit to force link up */
-	if (adapter->hw.mac.type == ixgbe_mac_X540) {
-		reg_data = IXGBE_READ_REG(hw, IXGBE_MACC);
-		reg_data |= IXGBE_MACC_FLU;
-		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data);
-	}
-
-	/* right now we only support MAC loopback in the driver */
-	reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-	/* Setup MAC loopback */
-	reg_data |= IXGBE_HLREG0_LPBK;
-	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data);
-
-	reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-	reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE;
-	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data);
-
-	reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC);
-	reg_data &= ~IXGBE_AUTOC_LMS_MASK;
-	reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU;
-	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data);
-	IXGBE_WRITE_FLUSH(hw);
-	usleep_range(10000, 20000);
-
-	/* Disable Atlas Tx lanes; re-enabled in reset path */
-	if (hw->mac.type == ixgbe_mac_82598EB) {
-		u8 atlas;
-
-		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas);
-		atlas |= IXGBE_ATLAS_PDN_TX_REG_EN;
-		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas);
-
-		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas);
-		atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
-		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas);
-
-		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas);
-		atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
-		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas);
-
-		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas);
-		atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
-		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas);
-	}
-
-	return 0;
-}
-
-static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter)
-{
-	u32 reg_data;
-
-	reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0);
-	reg_data &= ~IXGBE_HLREG0_LPBK;
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data);
-}
-
-
-
-
-
-
-static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data)
-{
-
-	//*data = ixgbe_setup_desc_rings(adapter);
-	//if (*data)
-	//	goto out;
-	*data = ixgbe_setup_loopback_test(adapter);
-	if (*data)
-		goto err_loopback;
-	//*data = ixgbe_run_loopback_test(adapter);
-	ixgbe_loopback_cleanup(adapter);
-
-err_loopback:
-	//ixgbe_free_desc_rings(adapter);
-//out:
-	return *data;
-
-}
-
-#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
-static int ixgbe_diag_test_count(struct net_device *netdev)
-{
-	return IXGBE_TEST_LEN;
-}
-
-#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
-static void ixgbe_diag_test(struct net_device *netdev,
-			    struct ethtool_test *eth_test, u64 *data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	bool if_running = netif_running(netdev);
-
-	set_bit(__IXGBE_TESTING, &adapter->state);
-	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-		/* Offline tests */
-
-		e_info(hw, "offline testing starting\n");
-
-		/* Link test performed before hardware reset so autoneg doesn't
-		 * interfere with test result */
-		if (ixgbe_link_test(adapter, &data[4]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
-			int i;
-			for (i = 0; i < adapter->num_vfs; i++) {
-				if (adapter->vfinfo[i].clear_to_send) {
-					e_warn(drv, "Please take active VFS "
-					       "offline and restart the "
-					       "adapter before running NIC "
-					       "diagnostics\n");
-					data[0] = 1;
-					data[1] = 1;
-					data[2] = 1;
-					data[3] = 1;
-					eth_test->flags |= ETH_TEST_FL_FAILED;
-					clear_bit(__IXGBE_TESTING,
-						  &adapter->state);
-					goto skip_ol_tests;
-				}
-			}
-		}
-
-		if (if_running)
-			/* indicate we're in test mode */
-			dev_close(netdev);
-		else
-			ixgbe_reset(adapter);
-
-		e_info(hw, "register testing starting\n");
-		if (ixgbe_reg_test(adapter, &data[0]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		ixgbe_reset(adapter);
-		e_info(hw, "eeprom testing starting\n");
-		if (ixgbe_eeprom_test(adapter, &data[1]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		ixgbe_reset(adapter);
-		e_info(hw, "interrupt testing starting\n");
-		if (ixgbe_intr_test(adapter, &data[2]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		/* If SRIOV or VMDq is enabled then skip MAC
-		 * loopback diagnostic. */
-		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
-				      IXGBE_FLAG_VMDQ_ENABLED)) {
-			e_info(hw, "skip MAC loopback diagnostic in VT mode\n");
-			data[3] = 0;
-			goto skip_loopback;
-		}
-
-		ixgbe_reset(adapter);
-		e_info(hw, "loopback testing starting\n");
-		if (ixgbe_loopback_test(adapter, &data[3]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-skip_loopback:
-		ixgbe_reset(adapter);
-
-		clear_bit(__IXGBE_TESTING, &adapter->state);
-		if (if_running)
-			dev_open(netdev);
-	} else {
-		e_info(hw, "online testing starting\n");
-		/* Online tests */
-		if (ixgbe_link_test(adapter, &data[4]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		/* Online tests aren't run; pass by default */
-		data[0] = 0;
-		data[1] = 0;
-		data[2] = 0;
-		data[3] = 0;
-
-		clear_bit(__IXGBE_TESTING, &adapter->state);
-	}
-skip_ol_tests:
-	msleep_interruptible(4 * 1000);
-}
-
-static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
-			       struct ethtool_wolinfo *wol)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int retval = 1;
-	u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
-
-	/* WOL not supported except for the following */
-	switch (hw->device_id) {
-	case IXGBE_DEV_ID_82599_SFP:
-		/* Only these subdevice could supports WOL */
-		switch (hw->subsystem_device_id) {
-		case IXGBE_SUBDEV_ID_82599_560FLR:
-			/* only support first port */
-			if (hw->bus.func != 0) {
-				wol->supported = 0;
-				break;
-			}
-		case IXGBE_SUBDEV_ID_82599_SFP:
-			retval = 0;
-			break;
-		default:
-			wol->supported = 0;
-			break;
-		}
-		break;
-	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
-		/* All except this subdevice support WOL */
-		if (hw->subsystem_device_id ==
-		    IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) {
-			wol->supported = 0;
-			break;
-		}
-		retval = 0;
-		break;
-	case IXGBE_DEV_ID_82599_KX4:
-		retval = 0;
-		break;
-	case IXGBE_DEV_ID_X540T:
-		/* check eeprom to see if enabled wol */
-		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
-		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
-		     (hw->bus.func == 0))) {
-			retval = 0;
-			break;
-		}
-
-		/* All others not supported */
-		wol->supported = 0;
-		break;
-	default:
-		wol->supported = 0;
-	}
-	return retval;
-}
-
-static void ixgbe_get_wol(struct net_device *netdev,
-			  struct ethtool_wolinfo *wol)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	wol->supported = WAKE_UCAST | WAKE_MCAST |
-			 WAKE_BCAST | WAKE_MAGIC;
-	wol->wolopts = 0;
-
-	if (ixgbe_wol_exclusion(adapter, wol) ||
-	    !device_can_wakeup(&adapter->pdev->dev))
-		return;
-
-	if (adapter->wol & IXGBE_WUFC_EX)
-		wol->wolopts |= WAKE_UCAST;
-	if (adapter->wol & IXGBE_WUFC_MC)
-		wol->wolopts |= WAKE_MCAST;
-	if (adapter->wol & IXGBE_WUFC_BC)
-		wol->wolopts |= WAKE_BCAST;
-	if (adapter->wol & IXGBE_WUFC_MAG)
-		wol->wolopts |= WAKE_MAGIC;
-}
-
-static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
-		return -EOPNOTSUPP;
-
-	if (ixgbe_wol_exclusion(adapter, wol))
-		return wol->wolopts ? -EOPNOTSUPP : 0;
-
-	adapter->wol = 0;
-
-	if (wol->wolopts & WAKE_UCAST)
-		adapter->wol |= IXGBE_WUFC_EX;
-	if (wol->wolopts & WAKE_MCAST)
-		adapter->wol |= IXGBE_WUFC_MC;
-	if (wol->wolopts & WAKE_BCAST)
-		adapter->wol |= IXGBE_WUFC_BC;
-	if (wol->wolopts & WAKE_MAGIC)
-		adapter->wol |= IXGBE_WUFC_MAG;
-
-	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
-
-	return 0;
-}
-
-static int ixgbe_nway_reset(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	if (netif_running(netdev))
-		ixgbe_reinit_locked(adapter);
-
-	return 0;
-}
-
-#ifdef HAVE_ETHTOOL_SET_PHYS_ID
-static int ixgbe_set_phys_id(struct net_device *netdev,
-			     enum ethtool_phys_id_state state)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	switch (state) {
-	case ETHTOOL_ID_ACTIVE:
-		adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-		return 2;
-
-	case ETHTOOL_ID_ON:
-		hw->mac.ops.led_on(hw, IXGBE_LED_ON);
-		break;
-
-	case ETHTOOL_ID_OFF:
-		hw->mac.ops.led_off(hw, IXGBE_LED_ON);
-		break;
-
-	case ETHTOOL_ID_INACTIVE:
-		/* Restore LED settings */
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg);
-		break;
-	}
-
-	return 0;
-}
-#else
-static int ixgbe_phys_id(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-	u32 i;
-
-	if (!data || data > 300)
-		data = 300;
-
-	for (i = 0; i < (data * 1000); i += 400) {
-		ixgbe_led_on(hw, IXGBE_LED_ON);
-		msleep_interruptible(200);
-		ixgbe_led_off(hw, IXGBE_LED_ON);
-		msleep_interruptible(200);
-	}
-
-	/* Restore LED settings */
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
-
-	return 0;
-}
-#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
-
-static int ixgbe_get_coalesce(struct net_device *netdev,
-			      struct ethtool_coalesce *ec)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
-#ifndef CONFIG_IXGBE_NAPI
-	ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit;
-#endif /* CONFIG_IXGBE_NAPI */
-	/* only valid if in constant ITR mode */
-	if (adapter->rx_itr_setting <= 1)
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
-	else
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
-
-	/* if in mixed tx/rx queues per vector mode, report only rx settings */
-	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
-		return 0;
-
-	/* only valid if in constant ITR mode */
-	if (adapter->tx_itr_setting <= 1)
-		ec->tx_coalesce_usecs = adapter->tx_itr_setting;
-	else
-		ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
-
-	return 0;
-}
-
-/*
- * this function must be called before setting the new value of
- * rx_itr_setting
- */
-#ifdef NO_VNIC
-static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-
-	/* nothing to do if LRO or RSC are not enabled */
-	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) ||
-	    !(netdev->features & NETIF_F_LRO))
-		return false;
-
-	/* check the feature flag value and enable RSC if necessary */
-	if (adapter->rx_itr_setting == 1 ||
-	    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
-		if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
-			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
-			e_info(probe, "rx-usecs value high enough "
-				      "to re-enable RSC\n");
-			return true;
-		}
-	/* if interrupt rate is too high then disable RSC */
-	} else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
-		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
-#ifdef IXGBE_NO_LRO
-		e_info(probe, "rx-usecs set too low, disabling RSC\n");
-#else
-		e_info(probe, "rx-usecs set too low, "
-			      "falling back to software LRO\n");
-#endif
-		return true;
-	}
-	return false;
-}
-#endif
-
-static int ixgbe_set_coalesce(struct net_device *netdev,
-			      struct ethtool_coalesce *ec)
-{
-#ifdef NO_VNIC
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_q_vector *q_vector;
-	int i;
-	int num_vectors;
-	u16 tx_itr_param, rx_itr_param;
-	bool need_reset = false;
-
-	/* don't accept tx specific changes if we've got mixed RxTx vectors */
-	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count
-	    && ec->tx_coalesce_usecs)
-		return -EINVAL;
-
-	if (ec->tx_max_coalesced_frames_irq)
-		adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
-
-#ifndef CONFIG_IXGBE_NAPI
-	if (ec->rx_max_coalesced_frames_irq)
-		adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq;
-
-#endif
-	if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
-	    (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
-		return -EINVAL;
-
-	if (ec->rx_coalesce_usecs > 1)
-		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
-	else
-		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
-
-	if (adapter->rx_itr_setting == 1)
-		rx_itr_param = IXGBE_20K_ITR;
-	else
-		rx_itr_param = adapter->rx_itr_setting;
-
-	if (ec->tx_coalesce_usecs > 1)
-		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
-	else
-		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
-
-	if (adapter->tx_itr_setting == 1)
-		tx_itr_param = IXGBE_10K_ITR;
-	else
-		tx_itr_param = adapter->tx_itr_setting;
-
-	/* check the old value and enable RSC if necessary */
-	need_reset = ixgbe_update_rsc(adapter);
-
-	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
-		num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-	else
-		num_vectors = 1;
-
-	for (i = 0; i < num_vectors; i++) {
-		q_vector = adapter->q_vector[i];
-		q_vector->tx.work_limit = adapter->tx_work_limit;
-		q_vector->rx.work_limit = adapter->rx_work_limit;
-		if (q_vector->tx.count && !q_vector->rx.count)
-			/* tx only */
-			q_vector->itr = tx_itr_param;
-		else
-			/* rx only or mixed */
-			q_vector->itr = rx_itr_param;
-		ixgbe_write_eitr(q_vector);
-	}
-
-	/*
-	 * do reset here at the end to make sure EITR==0 case is handled
-	 * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
-	 * also locks in RSC enable/disable which requires reset
-	 */
-	if (need_reset)
-		ixgbe_do_reset(netdev);
-#endif
-	return 0;
-}
-
-#ifndef HAVE_NDO_SET_FEATURES
-static u32 ixgbe_get_rx_csum(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_ring *ring = adapter->rx_ring[0];
-	return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
-}
-
-static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int i;
-
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		struct ixgbe_ring *ring = adapter->rx_ring[i];
-		if (data)
-			set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
-		else
-			clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
-	}
-
-	/* LRO and RSC both depend on RX checksum to function */
-	if (!data && (netdev->features & NETIF_F_LRO)) {
-		netdev->features &= ~NETIF_F_LRO;
-
-		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
-			adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
-			ixgbe_do_reset(netdev);
-		}
-	}
-
-	return 0;
-}
-
-static u32 ixgbe_get_tx_csum(struct net_device *netdev)
-{
-	return (netdev->features & NETIF_F_IP_CSUM) != 0;
-}
-
-static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	u32 feature_list;
-
-#ifdef NETIF_F_IPV6_CSUM
-	feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-#else
-	feature_list = NETIF_F_IP_CSUM;
-#endif
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		feature_list |= NETIF_F_SCTP_CSUM;
-		break;
-	default:
-		break;
-	}
-	if (data)
-		netdev->features |= feature_list;
-	else
-		netdev->features &= ~feature_list;
-
-	return 0;
-}
-
-#ifdef NETIF_F_TSO
-static int ixgbe_set_tso(struct net_device *netdev, u32 data)
-{
-	if (data) {
-		netdev->features |= NETIF_F_TSO;
-#ifdef NETIF_F_TSO6
-		netdev->features |= NETIF_F_TSO6;
-#endif
-	} else {
-#ifndef HAVE_NETDEV_VLAN_FEATURES
-#ifdef NETIF_F_HW_VLAN_TX
-		struct ixgbe_adapter *adapter = netdev_priv(netdev);
-		/* disable TSO on all VLANs if they're present */
-		if (adapter->vlgrp) {
-			int i;
-			struct net_device *v_netdev;
-			for (i = 0; i < VLAN_N_VID; i++) {
-				v_netdev =
-				       vlan_group_get_device(adapter->vlgrp, i);
-				if (v_netdev) {
-					v_netdev->features &= ~NETIF_F_TSO;
-#ifdef NETIF_F_TSO6
-					v_netdev->features &= ~NETIF_F_TSO6;
-#endif
-					vlan_group_set_device(adapter->vlgrp, i,
-							      v_netdev);
-				}
-			}
-		}
-#endif
-#endif /* HAVE_NETDEV_VLAN_FEATURES */
-		netdev->features &= ~NETIF_F_TSO;
-#ifdef NETIF_F_TSO6
-		netdev->features &= ~NETIF_F_TSO6;
-#endif
-	}
-	return 0;
-}
-
-#endif /* NETIF_F_TSO */
-#ifdef ETHTOOL_GFLAGS
-static int ixgbe_set_flags(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN;
-	u32 changed = netdev->features ^ data;
-	bool need_reset = false;
-	int rc;
-
-#ifndef HAVE_VLAN_RX_REGISTER
-	if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
-	    !(data & ETH_FLAG_RXVLAN))
-		return -EINVAL;
-
-#endif
-#ifdef NETIF_F_RXHASH
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED)
-		supported_flags |= ETH_FLAG_RXHASH;
-#endif
-#ifdef IXGBE_NO_LRO
-	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
-#endif
-		supported_flags |= ETH_FLAG_LRO;
-
-#ifdef ETHTOOL_GRXRINGS
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_X540:
-	case ixgbe_mac_82599EB:
-		supported_flags |= ETH_FLAG_NTUPLE;
-	default:
-		break;
-	}
-
-#endif
-	rc = ethtool_op_set_flags(netdev, data, supported_flags);
-	if (rc)
-		return rc;
-
-#ifndef HAVE_VLAN_RX_REGISTER
-	if (changed & ETH_FLAG_RXVLAN)
-		ixgbe_vlan_mode(netdev, netdev->features);
-
-#endif
-	/* if state changes we need to update adapter->flags and reset */
-	if (!(netdev->features & NETIF_F_LRO)) {
-		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
-			need_reset = true;
-		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
-	} else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
-		   !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
-		if (adapter->rx_itr_setting == 1 ||
-		    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
-			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
-			need_reset = true;
-		} else if (changed & ETH_FLAG_LRO) {
-#ifdef IXGBE_NO_LRO
-			e_info(probe, "rx-usecs set too low, "
-			       "disabling RSC\n");
-#else
-			e_info(probe, "rx-usecs set too low, "
-			       "falling back to software LRO\n");
-#endif
-		}
-	}
-
-#ifdef ETHTOOL_GRXRINGS
-	/*
-	 * Check if Flow Director n-tuple support was enabled or disabled.  If
-	 * the state changed, we need to reset.
-	 */
-	if (!(netdev->features & NETIF_F_NTUPLE)) {
-		if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
-			/* turn off Flow Director, set ATR and reset */
-			if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
-			    !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-				adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
-			need_reset = true;
-		}
-		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
-	} else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
-		/* turn off ATR, enable perfect filters and reset */
-		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
-		need_reset = true;
-	}
-
-#endif /* ETHTOOL_GRXRINGS */
-	if (need_reset)
-		ixgbe_do_reset(netdev);
-
-	return 0;
-}
-
-#endif /* ETHTOOL_GFLAGS */
-#endif /* HAVE_NDO_SET_FEATURES */
-#ifdef ETHTOOL_GRXRINGS
-static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
-					struct ethtool_rxnfc *cmd)
-{
-	union ixgbe_atr_input *mask = &adapter->fdir_mask;
-	struct ethtool_rx_flow_spec *fsp =
-		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct hlist_node *node, *node2;
-	struct ixgbe_fdir_filter *rule = NULL;
-
-	/* report total rule count */
-	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
-
-	hlist_for_each_entry_safe(rule, node, node2,
-				  &adapter->fdir_filter_list, fdir_node) {
-		if (fsp->location <= rule->sw_idx)
-			break;
-	}
-
-	if (!rule || fsp->location != rule->sw_idx)
-		return -EINVAL;
-
-	/* fill out the flow spec entry */
-
-	/* set flow type field */
-	switch (rule->filter.formatted.flow_type) {
-	case IXGBE_ATR_FLOW_TYPE_TCPV4:
-		fsp->flow_type = TCP_V4_FLOW;
-		break;
-	case IXGBE_ATR_FLOW_TYPE_UDPV4:
-		fsp->flow_type = UDP_V4_FLOW;
-		break;
-	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
-		fsp->flow_type = SCTP_V4_FLOW;
-		break;
-	case IXGBE_ATR_FLOW_TYPE_IPV4:
-		fsp->flow_type = IP_USER_FLOW;
-		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
-		fsp->h_u.usr_ip4_spec.proto = 0;
-		fsp->m_u.usr_ip4_spec.proto = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
-	fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
-	fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
-	fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
-	fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
-	fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
-	fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
-	fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
-	fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
-	fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
-	fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
-	fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
-	fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
-	fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
-	fsp->flow_type |= FLOW_EXT;
-
-	/* record action */
-	if (rule->action == IXGBE_FDIR_DROP_QUEUE)
-		fsp->ring_cookie = RX_CLS_FLOW_DISC;
-	else
-		fsp->ring_cookie = rule->action;
-
-	return 0;
-}
-
-static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
-				      struct ethtool_rxnfc *cmd,
-				      u32 *rule_locs)
-{
-	struct hlist_node *node, *node2;
-	struct ixgbe_fdir_filter *rule;
-	int cnt = 0;
-
-	/* report total rule count */
-	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
-
-	hlist_for_each_entry_safe(rule, node, node2,
-				  &adapter->fdir_filter_list, fdir_node) {
-		if (cnt == cmd->rule_cnt)
-			return -EMSGSIZE;
-		rule_locs[cnt] = rule->sw_idx;
-		cnt++;
-	}
-
-	cmd->rule_cnt = cnt;
-
-	return 0;
-}
-
-static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
-				   struct ethtool_rxnfc *cmd)
-{
-	cmd->data = 0;
-
-	/* if RSS is disabled then report no hashing */
-	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
-		return 0;
-
-	/* Report default options for RSS on ixgbe */
-	switch (cmd->flow_type) {
-	case TCP_V4_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case UDP_V4_FLOW:
-		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
-			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case SCTP_V4_FLOW:
-	case AH_ESP_V4_FLOW:
-	case AH_V4_FLOW:
-	case ESP_V4_FLOW:
-	case IPV4_FLOW:
-		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-		break;
-	case TCP_V6_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case UDP_V6_FLOW:
-		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
-			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	case SCTP_V6_FLOW:
-	case AH_ESP_V6_FLOW:
-	case AH_V6_FLOW:
-	case ESP_V6_FLOW:
-	case IPV6_FLOW:
-		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
-#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
-			   void *rule_locs)
-#else
-			   u32 *rule_locs)
-#endif
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
-
-	switch (cmd->cmd) {
-	case ETHTOOL_GRXRINGS:
-		cmd->data = adapter->num_rx_queues;
-		ret = 0;
-		break;
-	case ETHTOOL_GRXCLSRLCNT:
-		cmd->rule_cnt = adapter->fdir_filter_count;
-		ret = 0;
-		break;
-	case ETHTOOL_GRXCLSRULE:
-		ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
-		break;
-	case ETHTOOL_GRXCLSRLALL:
-		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
-						 rule_locs);
-		break;
-	case ETHTOOL_GRXFH:
-		ret = ixgbe_get_rss_hash_opts(adapter, cmd);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
-static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
-					   struct ixgbe_fdir_filter *input,
-					   u16 sw_idx)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct hlist_node *node, *node2, *parent;
-	struct ixgbe_fdir_filter *rule;
-	int err = -EINVAL;
-
-	parent = NULL;
-	rule = NULL;
-
-	hlist_for_each_entry_safe(rule, node, node2,
-				  &adapter->fdir_filter_list, fdir_node) {
-		/* hash found, or no matching entry */
-		if (rule->sw_idx >= sw_idx)
-			break;
-		parent = node;
-	}
-
-	/* if there is an old rule occupying our place remove it */
-	if (rule && (rule->sw_idx == sw_idx)) {
-		if (!input || (rule->filter.formatted.bkt_hash !=
-			       input->filter.formatted.bkt_hash)) {
-			err = ixgbe_fdir_erase_perfect_filter_82599(hw,
-								&rule->filter,
-								sw_idx);
-		}
-
-		hlist_del(&rule->fdir_node);
-		kfree(rule);
-		adapter->fdir_filter_count--;
-	}
-
-	/*
-	 * If no input this was a delete, err should be 0 if a rule was
-	 * successfully found and removed from the list else -EINVAL
-	 */
-	if (!input)
-		return err;
-
-	/* initialize node and set software index */
-	INIT_HLIST_NODE(&input->fdir_node);
-
-	/* add filter to the list */
-	if (parent)
-		hlist_add_after(parent, &input->fdir_node);
-	else
-		hlist_add_head(&input->fdir_node,
-			       &adapter->fdir_filter_list);
-
-	/* update counts */
-	adapter->fdir_filter_count++;
-
-	return 0;
-}
-
-static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
-				       u8 *flow_type)
-{
-	switch (fsp->flow_type & ~FLOW_EXT) {
-	case TCP_V4_FLOW:
-		*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
-		break;
-	case UDP_V4_FLOW:
-		*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
-		break;
-	case SCTP_V4_FLOW:
-		*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
-		break;
-	case IP_USER_FLOW:
-		switch (fsp->h_u.usr_ip4_spec.proto) {
-		case IPPROTO_TCP:
-			*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
-			break;
-		case IPPROTO_UDP:
-			*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
-			break;
-		case IPPROTO_SCTP:
-			*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
-			break;
-		case 0:
-			if (!fsp->m_u.usr_ip4_spec.proto) {
-				*flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
-				break;
-			}
-		default:
-			return 0;
-		}
-		break;
-	default:
-		return 0;
-	}
-
-	return 1;
-}
-
-static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
-					struct ethtool_rxnfc *cmd)
-{
-	struct ethtool_rx_flow_spec *fsp =
-		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_fdir_filter *input;
-	union ixgbe_atr_input mask;
-	int err;
-
-	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
-		return -EOPNOTSUPP;
-
-	/*
-	 * Don't allow programming if the action is a queue greater than
-	 * the number of online Rx queues.
-	 */
-	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
-	    (fsp->ring_cookie >= adapter->num_rx_queues))
-		return -EINVAL;
-
-	/* Don't allow indexes to exist outside of available space */
-	if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
-		e_err(drv, "Location out of range\n");
-		return -EINVAL;
-	}
-
-	input = kzalloc(sizeof(*input), GFP_ATOMIC);
-	if (!input)
-		return -ENOMEM;
-
-	memset(&mask, 0, sizeof(union ixgbe_atr_input));
-
-	/* set SW index */
-	input->sw_idx = fsp->location;
-
-	/* record flow type */
-	if (!ixgbe_flowspec_to_flow_type(fsp,
-					 &input->filter.formatted.flow_type)) {
-		e_err(drv, "Unrecognized flow type\n");
-		goto err_out;
-	}
-
-	mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
-				   IXGBE_ATR_L4TYPE_MASK;
-
-	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
-		mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
-
-	/* Copy input into formatted structures */
-	input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
-	mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
-	input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
-	mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
-	input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
-	mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
-	input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
-	mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
-
-	if (fsp->flow_type & FLOW_EXT) {
-		input->filter.formatted.vm_pool =
-				(unsigned char)ntohl(fsp->h_ext.data[1]);
-		mask.formatted.vm_pool =
-				(unsigned char)ntohl(fsp->m_ext.data[1]);
-		input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
-		mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
-		input->filter.formatted.flex_bytes =
-						fsp->h_ext.vlan_etype;
-		mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
-	}
-
-	/* determine if we need to drop or route the packet */
-	if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
-		input->action = IXGBE_FDIR_DROP_QUEUE;
-	else
-		input->action = fsp->ring_cookie;
-
-	spin_lock(&adapter->fdir_perfect_lock);
-
-	if (hlist_empty(&adapter->fdir_filter_list)) {
-		/* save mask and program input mask into HW */
-		memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
-		err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
-		if (err) {
-			e_err(drv, "Error writing mask\n");
-			goto err_out_w_lock;
-		}
-	} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
-		e_err(drv, "Only one mask supported per port\n");
-		goto err_out_w_lock;
-	}
-
-	/* apply mask and compute/store hash */
-	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
-
-	/* program filters to filter memory */
-	err = ixgbe_fdir_write_perfect_filter_82599(hw,
-				&input->filter, input->sw_idx,
-				(input->action == IXGBE_FDIR_DROP_QUEUE) ?
-				IXGBE_FDIR_DROP_QUEUE :
-				adapter->rx_ring[input->action]->reg_idx);
-	if (err)
-		goto err_out_w_lock;
-
-	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
-
-	spin_unlock(&adapter->fdir_perfect_lock);
-
-	kfree(input);
-	return err;
-err_out_w_lock:
-	spin_unlock(&adapter->fdir_perfect_lock);
-err_out:
-	kfree(input);
-	return -EINVAL;
-}
-
-static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
-					struct ethtool_rxnfc *cmd)
-{
-	struct ethtool_rx_flow_spec *fsp =
-		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	int err;
-
-	spin_lock(&adapter->fdir_perfect_lock);
-	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location));
-	spin_unlock(&adapter->fdir_perfect_lock);
-
-	return err;
-}
-
-#ifdef ETHTOOL_SRXNTUPLE
-/*
- * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid
- * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag
- * was defined that this function was present.
- */
-static int ixgbe_set_rx_ntuple(struct net_device *dev,
-			       struct ethtool_rx_ntuple *cmd)
-{
-	return -EOPNOTSUPP;
-}
-
-#endif
-#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
-		       IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
-static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
-				  struct ethtool_rxnfc *nfc)
-{
-	u32 flags2 = adapter->flags2;
-
-	/*
-	 * RSS does not support anything other than hashing
-	 * to queues on src and dst IPs and ports
-	 */
-	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
-			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
-		return -EINVAL;
-
-	switch (nfc->flow_type) {
-	case TCP_V4_FLOW:
-	case TCP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST) ||
-		    !(nfc->data & RXH_L4_B_0_1) ||
-		    !(nfc->data & RXH_L4_B_2_3))
-			return -EINVAL;
-		break;
-	case UDP_V4_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST))
-			return -EINVAL;
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
-			break;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	case UDP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST))
-			return -EINVAL;
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
-			break;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	case AH_ESP_V4_FLOW:
-	case AH_V4_FLOW:
-	case ESP_V4_FLOW:
-	case SCTP_V4_FLOW:
-	case AH_ESP_V6_FLOW:
-	case AH_V6_FLOW:
-	case ESP_V6_FLOW:
-	case SCTP_V6_FLOW:
-		if (!(nfc->data & RXH_IP_SRC) ||
-		    !(nfc->data & RXH_IP_DST) ||
-		    (nfc->data & RXH_L4_B_0_1) ||
-		    (nfc->data & RXH_L4_B_2_3))
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* if we changed something we need to update flags */
-	if (flags2 != adapter->flags2) {
-		struct ixgbe_hw *hw = &adapter->hw;
-		u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
-
-		if ((flags2 & UDP_RSS_FLAGS) &&
-		    !(adapter->flags2 & UDP_RSS_FLAGS))
-			e_warn(drv, "enabling UDP RSS: fragmented packets"
-			       " may arrive out of order to the stack above\n");
-
-		adapter->flags2 = flags2;
-
-		/* Perform hash on these packet types */
-		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
-		      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
-		      | IXGBE_MRQC_RSS_FIELD_IPV6
-		      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
-
-		mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
-			  IXGBE_MRQC_RSS_FIELD_IPV6_UDP);
-
-		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
-			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
-
-		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
-			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
-
-		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
-	}
-
-	return 0;
-}
-
-static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
-
-	switch (cmd->cmd) {
-	case ETHTOOL_SRXCLSRLINS:
-		ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
-		break;
-	case ETHTOOL_SRXCLSRLDEL:
-		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
-		break;
-	case ETHTOOL_SRXFH:
-		ret = ixgbe_set_rss_hash_opt(adapter, cmd);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
-#endif /* ETHTOOL_GRXRINGS */
-//static
-struct ethtool_ops ixgbe_ethtool_ops = {
-	.get_settings		= ixgbe_get_settings,
-	.set_settings		= ixgbe_set_settings,
-	.get_drvinfo		= ixgbe_get_drvinfo,
-	.get_regs_len		= ixgbe_get_regs_len,
-	.get_regs		= ixgbe_get_regs,
-	.get_wol		= ixgbe_get_wol,
-	.set_wol		= ixgbe_set_wol,
-	.nway_reset		= ixgbe_nway_reset,
-	.get_link		= ethtool_op_get_link,
-	.get_eeprom_len		= ixgbe_get_eeprom_len,
-	.get_eeprom		= ixgbe_get_eeprom,
-	.set_eeprom		= ixgbe_set_eeprom,
-	.get_ringparam		= ixgbe_get_ringparam,
-	.set_ringparam		= ixgbe_set_ringparam,
-	.get_pauseparam		= ixgbe_get_pauseparam,
-	.set_pauseparam		= ixgbe_set_pauseparam,
-	.get_msglevel		= ixgbe_get_msglevel,
-	.set_msglevel		= ixgbe_set_msglevel,
-#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
-	.self_test_count	= ixgbe_diag_test_count,
-#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
-	.self_test		= ixgbe_diag_test,
-	.get_strings		= ixgbe_get_strings,
-#ifdef HAVE_ETHTOOL_SET_PHYS_ID
-	.set_phys_id		= ixgbe_set_phys_id,
-#else
-	.phys_id		= ixgbe_phys_id,
-#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
-#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
-	.get_stats_count	= ixgbe_get_stats_count,
-#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
-	.get_sset_count		= ixgbe_get_sset_count,
-#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
-	.get_ethtool_stats      = ixgbe_get_ethtool_stats,
-#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
-	.get_perm_addr		= ethtool_op_get_perm_addr,
-#endif
-	.get_coalesce		= ixgbe_get_coalesce,
-	.set_coalesce		= ixgbe_set_coalesce,
-#ifndef HAVE_NDO_SET_FEATURES
-	.get_rx_csum		= ixgbe_get_rx_csum,
-	.set_rx_csum		= ixgbe_set_rx_csum,
-	.get_tx_csum		= ixgbe_get_tx_csum,
-	.set_tx_csum		= ixgbe_set_tx_csum,
-	.get_sg			= ethtool_op_get_sg,
-	.set_sg			= ethtool_op_set_sg,
-#ifdef NETIF_F_TSO
-	.get_tso		= ethtool_op_get_tso,
-	.set_tso		= ixgbe_set_tso,
-#endif
-#ifdef ETHTOOL_GFLAGS
-	.get_flags		= ethtool_op_get_flags,
-	.set_flags		= ixgbe_set_flags,
-#endif
-#endif /* HAVE_NDO_SET_FEATURES */
-#ifdef ETHTOOL_GRXRINGS
-	.get_rxnfc		= ixgbe_get_rxnfc,
-	.set_rxnfc		= ixgbe_set_rxnfc,
-#ifdef ETHTOOL_SRXNTUPLE
-	.set_rx_ntuple		= ixgbe_set_rx_ntuple,
-#endif
-#endif
-};
-
-void ixgbe_set_ethtool_ops(struct net_device *netdev)
-{
-	SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
-}
-#endif /* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
deleted file mode 100644
index eec86cbb..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_FCOE_H
-#define _IXGBE_FCOE_H
-
-#ifdef IXGBE_FCOE
-
-#include <scsi/fc/fc_fs.h>
-#include <scsi/fc/fc_fcoe.h>
-
-/* shift bits within STAT fo FCSTAT */
-#define IXGBE_RXDADV_FCSTAT_SHIFT	4
-
-/* ddp user buffer */
-#define IXGBE_BUFFCNT_MAX	256	/* 8 bits bufcnt */
-#define IXGBE_FCPTR_ALIGN	16
-#define IXGBE_FCPTR_MAX		(IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t))
-#define IXGBE_FCBUFF_4KB	0x0
-#define IXGBE_FCBUFF_8KB	0x1
-#define IXGBE_FCBUFF_16KB	0x2
-#define IXGBE_FCBUFF_64KB	0x3
-#define IXGBE_FCBUFF_MAX	65536	/* 64KB max */
-#define IXGBE_FCBUFF_MIN	4096	/* 4KB min */
-#define IXGBE_FCOE_DDP_MAX	512	/* 9 bits xid */
-
-/* Default traffic class to use for FCoE */
-#define IXGBE_FCOE_DEFTC	3
-
-/* fcerr */
-#define IXGBE_FCERR_BADCRC	0x00100000
-#define IXGBE_FCERR_EOFSOF	0x00200000
-#define IXGBE_FCERR_NOFIRST	0x00300000
-#define IXGBE_FCERR_OOOSEQ	0x00400000
-#define IXGBE_FCERR_NODMA	0x00500000
-#define IXGBE_FCERR_PKTLOST	0x00600000
-
-/* FCoE DDP for target mode */
-#define __IXGBE_FCOE_TARGET	1
-
-struct ixgbe_fcoe_ddp {
-	int len;
-	u32 err;
-	unsigned int sgc;
-	struct scatterlist *sgl;
-	dma_addr_t udp;
-	u64 *udl;
-	struct pci_pool *pool;
-};
-
-struct ixgbe_fcoe {
-	struct pci_pool **pool;
-	atomic_t refcnt;
-	spinlock_t lock;
-	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
-	unsigned char *extra_ddp_buffer;
-	dma_addr_t extra_ddp_buffer_dma;
-	u64 __percpu *pcpu_noddp;
-	u64 __percpu *pcpu_noddp_ext_buff;
-	unsigned long mode;
-	u8 tc;
-	u8 up;
-	u8 up_set;
-};
-#endif /* IXGBE_FCOE */
-
-#endif /* _IXGBE_FCOE_H */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
deleted file mode 100644
index a5acf19c..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
+++ /dev/null
@@ -1,2951 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/******************************************************************************
- Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
-******************************************************************************/
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <linux/string.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#ifdef HAVE_SCTP
-#include <linux/sctp.h>
-#endif
-#include <linux/pkt_sched.h>
-#include <linux/ipv6.h>
-#ifdef NETIF_F_TSO
-#include <net/checksum.h>
-#ifdef NETIF_F_TSO6
-#include <net/ip6_checksum.h>
-#endif
-#endif
-#ifdef SIOCETHTOOL
-#include <linux/ethtool.h>
-#endif
-
-#include "ixgbe.h"
-
-#undef CONFIG_DCA
-#undef CONFIG_DCA_MODULE
-
-char ixgbe_driver_name[] = "ixgbe";
-#define DRV_HW_PERF
-
-#ifndef CONFIG_IXGBE_NAPI
-#define DRIVERNAPI
-#else
-#define DRIVERNAPI "-NAPI"
-#endif
-
-#define FPGA
-
-#define VMDQ_TAG
-
-#define MAJ 3
-#define MIN 9
-#define BUILD 17
-#define DRV_VERSION	__stringify(MAJ) "." __stringify(MIN) "." \
-			__stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG
-const char ixgbe_driver_version[] = DRV_VERSION;
-
-/* ixgbe_pci_tbl - PCI Device ID Table
- *
- * Wildcard entries (PCI_ANY_ID) should come last
- * Last entry must be all 0s
- *
- * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
- *   Class, Class Mask, private data (not used) }
- */
-const struct pci_device_id ixgbe_pci_tbl[] = {
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)},
-	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)},
-	/* required last entry */
-	{0, }
-};
-
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
-			    void *p);
-static struct notifier_block dca_notifier = {
-	.notifier_call	= ixgbe_notify_dca,
-	.next		= NULL,
-	.priority	= 0
-};
-
-#endif
-MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
-MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-#define DEFAULT_DEBUG_LEVEL_SHIFT 3
-
-
-static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
-{
-	u32 ctrl_ext;
-
-	/* Let firmware take over control of h/w */
-	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
-			ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
-}
-
-#ifdef NO_VNIC
-static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
-{
-	u32 ctrl_ext;
-
-	/* Let firmware know the driver has taken over */
-	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
-			ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
-}
-#endif
-
-
-static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_hw_stats *hwstats = &adapter->stats;
-	int i;
-	u32 data;
-
-	if ((hw->fc.current_mode != ixgbe_fc_full) &&
-	    (hw->fc.current_mode != ixgbe_fc_rx_pause))
-		return;
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
-		break;
-	default:
-		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
-	}
-	hwstats->lxoffrxc += data;
-
-	/* refill credits (no tx hang) if we received xoff */
-	if (!data)
-		return;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		clear_bit(__IXGBE_HANG_CHECK_ARMED,
-			  &adapter->tx_ring[i]->state);
-}
-
-static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_hw_stats *hwstats = &adapter->stats;
-	u32 xoff[8] = {0};
-	int i;
-	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
-
-#ifdef HAVE_DCBNL_IEEE
-	if (adapter->ixgbe_ieee_pfc)
-		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
-
-#endif
-	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
-		ixgbe_update_xoff_rx_lfc(adapter);
-		return;
-	}
-
-	/* update stats for each tc, only valid with PFC enabled */
-	for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
-		switch (hw->mac.type) {
-		case ixgbe_mac_82598EB:
-			xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
-			break;
-		default:
-			xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
-		}
-		hwstats->pxoffrxc[i] += xoff[i];
-	}
-
-	/* disarm tx queues that have received xoff frames */
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
-		u8 tc = tx_ring->dcb_tc;
-
-		if ((tc <= 7) && (xoff[tc]))
-			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
-	}
-}
-
-
-
-
-#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
-
-
-
-
-#ifdef HAVE_8021P_SUPPORT
-/**
- * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping
- * @adapter: driver data
- */
-void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 vlnctrl;
-	int i;
-
-	/* leave vlan tag stripping enabled for DCB */
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-		return;
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl &= ~IXGBE_VLNCTRL_VME;
-		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-		break;
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			u8 reg_idx = adapter->rx_ring[i]->reg_idx;
-			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
-			vlnctrl &= ~IXGBE_RXDCTL_VME;
-			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
-		}
-		break;
-	default:
-		break;
-	}
-}
-
-#endif
-/**
- * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping
- * @adapter: driver data
- */
-void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 vlnctrl;
-	int i;
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl |= IXGBE_VLNCTRL_VME;
-		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-		break;
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			u8 reg_idx = adapter->rx_ring[i]->reg_idx;
-			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
-			vlnctrl |= IXGBE_RXDCTL_VME;
-			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
-		}
-		break;
-	default:
-		break;
-	}
-}
-
-#ifdef HAVE_VLAN_RX_REGISTER
-void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp)
-#else
-void ixgbe_vlan_mode(struct net_device *netdev, u32 features)
-#endif
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-#ifdef HAVE_8021P_SUPPORT
-	bool enable;
-#endif
-#ifdef HAVE_VLAN_RX_REGISTER
-
-	//if (!test_bit(__IXGBE_DOWN, &adapter->state))
-	//	ixgbe_irq_disable(adapter);
-
-	adapter->vlgrp = grp;
-
-	//if (!test_bit(__IXGBE_DOWN, &adapter->state))
-	//	ixgbe_irq_enable(adapter, true, true);
-#endif
-#ifdef HAVE_8021P_SUPPORT
-#ifdef HAVE_VLAN_RX_REGISTER
-	enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED));
-#else
-	enable = !!(features & NETIF_F_HW_VLAN_RX);
-#endif
-	if (enable)
-		/* enable VLAN tag insert/strip */
-		ixgbe_vlan_stripping_enable(adapter);
-	else
-		/* disable VLAN tag insert/strip */
-		ixgbe_vlan_stripping_disable(adapter);
-
-#endif
-}
-
-static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq)
-{
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-	struct netdev_hw_addr *mc_ptr;
-#else
-	struct dev_mc_list *mc_ptr;
-#endif
-	struct ixgbe_adapter *adapter = hw->back;
-	u8 *addr = *mc_addr_ptr;
-
-	*vmdq = adapter->num_vfs;
-
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-	mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]);
-	if (mc_ptr->list.next) {
-		struct netdev_hw_addr *ha;
-
-		ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list);
-		*mc_addr_ptr = ha->addr;
-	}
-#else
-	mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]);
-	if (mc_ptr->next)
-		*mc_addr_ptr = mc_ptr->next->dmi_addr;
-#endif
-	else
-		*mc_addr_ptr = NULL;
-
-	return addr;
-}
-
-/**
- * ixgbe_write_mc_addr_list - write multicast addresses to MTA
- * @netdev: network interface device structure
- *
- * Writes multicast address list to the MTA hash table.
- * Returns: -ENOMEM on failure
- *                0 on no addresses written
- *                X on writing X addresses to MTA
- **/
-int ixgbe_write_mc_addr_list(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-	struct netdev_hw_addr *ha;
-#endif
-	u8  *addr_list = NULL;
-	int addr_count = 0;
-
-	if (!hw->mac.ops.update_mc_addr_list)
-		return -ENOMEM;
-
-	if (!netif_running(netdev))
-		return 0;
-
-
-	hw->mac.ops.update_mc_addr_list(hw, NULL, 0,
-					ixgbe_addr_list_itr, true);
-
-	if (!netdev_mc_empty(netdev)) {
-#ifdef NETDEV_HW_ADDR_T_MULTICAST
-		ha = list_first_entry(&netdev->mc.list,
-				      struct netdev_hw_addr, list);
-		addr_list = ha->addr;
-#else
-		addr_list = netdev->mc_list->dmi_addr;
-#endif
-		addr_count = netdev_mc_count(netdev);
-
-		hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count,
-						ixgbe_addr_list_itr, false);
-	}
-
-#ifdef CONFIG_PCI_IOV
-	//ixgbe_restore_vf_multicasts(adapter);
-#endif
-	return addr_count;
-}
-
-
-void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int i;
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) {
-			hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
-						adapter->mac_table[i].queue,
-						IXGBE_RAH_AV);
-		} else {
-			hw->mac.ops.clear_rar(hw, i);
-		}
-	}
-}
-
-void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int i;
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
-			if (adapter->mac_table[i].state &
-					IXGBE_MAC_STATE_IN_USE) {
-				hw->mac.ops.set_rar(hw, i,
-						adapter->mac_table[i].addr,
-						adapter->mac_table[i].queue,
-						IXGBE_RAH_AV);
-			} else {
-				hw->mac.ops.clear_rar(hw, i);
-			}
-			adapter->mac_table[i].state &=
-				~(IXGBE_MAC_STATE_MODIFIED);
-		}
-	}
-}
-
-int ixgbe_available_rars(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int i, count = 0;
-
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state == 0)
-			count++;
-	}
-	return count;
-}
-
-int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return 0;
-
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
-			continue;
-		adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
-						IXGBE_MAC_STATE_IN_USE);
-		memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
-		adapter->mac_table[i].queue = queue;
-		ixgbe_sync_mac_table(adapter);
-		return i;
-	}
-	return -ENOMEM;
-}
-
-void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
-{
-	int i;
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
-		adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
-		memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-		adapter->mac_table[i].queue = 0;
-	}
-	ixgbe_sync_mac_table(adapter);
-}
-
-void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index)
-{
-	adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED;
-	adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE;
-	memset(adapter->mac_table[index].addr, 0, ETH_ALEN);
-	adapter->mac_table[index].queue = 0;
-	ixgbe_sync_mac_table(adapter);
-}
-
-int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue)
-{
-	/* search table for addr, if found, set to 0 and sync */
-	int i;
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	if (is_zero_ether_addr(addr))
-		return 0;
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
-		    adapter->mac_table[i].queue == queue) {
-			adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
-			adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
-			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-			adapter->mac_table[i].queue = 0;
-			ixgbe_sync_mac_table(adapter);
-			return 0;
-		}
-	}
-	return -ENOMEM;
-}
-#ifdef HAVE_SET_RX_MODE
-/**
- * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- *                0 on no addresses written
- *                X on writing X addresses to the RAR table
- **/
-int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
-			     struct net_device *netdev, unsigned int vfn)
-{
-	int count = 0;
-
-	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
-		return -ENOMEM;
-
-	if (!netdev_uc_empty(netdev)) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-		struct netdev_hw_addr *ha;
-#else
-		struct dev_mc_list *ha;
-#endif
-		netdev_for_each_uc_addr(ha, netdev) {
-#ifdef NETDEV_HW_ADDR_T_UNICAST
-			ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn);
-			ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn);
-#else
-			ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn);
-			ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn);
-#endif
-			count++;
-		}
-	}
-	return count;
-}
-
-#endif
-/**
- * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
- * @netdev: network interface device structure
- *
- * The set_rx_method entry point is called whenever the unicast/multicast
- * address list or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper unicast, multicast and
- * promiscuous mode.
- **/
-void ixgbe_set_rx_mode(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
-	u32 vlnctrl;
-	int count;
-
-	/* Check for Promiscuous and All Multicast modes */
-	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-
-	/* set all bits that we expect to always be set */
-	fctrl |= IXGBE_FCTRL_BAM;
-	fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
-	fctrl |= IXGBE_FCTRL_PMCF;
-
-	/* clear the bits we are changing the status of */
-	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
-	vlnctrl  &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
-
-	if (netdev->flags & IFF_PROMISC) {
-		hw->addr_ctrl.user_set_promisc = true;
-		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
-		vmolr |= IXGBE_VMOLR_MPE;
-	} else {
-		if (netdev->flags & IFF_ALLMULTI) {
-			fctrl |= IXGBE_FCTRL_MPE;
-			vmolr |= IXGBE_VMOLR_MPE;
-		} else {
-			/*
-			 * Write addresses to the MTA, if the attempt fails
-			 * then we should just turn on promiscuous mode so
-			 * that we can at least receive multicast traffic
-			 */
-			count = ixgbe_write_mc_addr_list(netdev);
-			if (count < 0) {
-				fctrl |= IXGBE_FCTRL_MPE;
-				vmolr |= IXGBE_VMOLR_MPE;
-			} else if (count) {
-				vmolr |= IXGBE_VMOLR_ROMPE;
-			}
-		}
-#ifdef NETIF_F_HW_VLAN_TX
-		/* enable hardware vlan filtering */
-		vlnctrl |= IXGBE_VLNCTRL_VFE;
-#endif
-		hw->addr_ctrl.user_set_promisc = false;
-#ifdef HAVE_SET_RX_MODE
-		/*
-		 * Write addresses to available RAR registers, if there is not
-		 * sufficient space to store all the addresses then enable
-		 * unicast promiscuous mode
-		 */
-		count = ixgbe_write_uc_addr_list(adapter, netdev,
-						 adapter->num_vfs);
-		if (count < 0) {
-			fctrl |= IXGBE_FCTRL_UPE;
-			vmolr |= IXGBE_VMOLR_ROPE;
-		}
-#endif
-	}
-
-	if (hw->mac.type != ixgbe_mac_82598EB) {
-		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) &
-			 ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
-			   IXGBE_VMOLR_ROPE);
-		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr);
-	}
-
-	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
-}
-
-
-
-
-
-
-
-
-/* Additional bittime to account for IXGBE framing */
-#define IXGBE_ETH_FRAMING 20
-
-/*
- * ixgbe_hpbthresh - calculate high water mark for flow control
- *
- * @adapter: board private structure to calculate for
- * @pb - packet buffer to calculate
- */
-static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *dev = adapter->netdev;
-	int link, tc, kb, marker;
-	u32 dv_id, rx_pba;
-
-	/* Calculate max LAN frame size */
-	tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
-
-#ifdef IXGBE_FCOE
-	/* FCoE traffic class uses FCOE jumbo frames */
-	if (dev->features & NETIF_F_FCOE_MTU) {
-		int fcoe_pb = 0;
-
-		fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
-
-		if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
-			tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
-	}
-#endif
-
-	/* Calculate delay value for device */
-	switch (hw->mac.type) {
-	case ixgbe_mac_X540:
-		dv_id = IXGBE_DV_X540(link, tc);
-		break;
-	default:
-		dv_id = IXGBE_DV(link, tc);
-		break;
-	}
-
-	/* Loopback switch introduces additional latency */
-	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-		dv_id += IXGBE_B2BT(tc);
-
-	/* Delay value is calculated in bit times convert to KB */
-	kb = IXGBE_BT2KB(dv_id);
-	rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
-
-	marker = rx_pba - kb;
-
-	/* It is possible that the packet buffer is not large enough
-	 * to provide required headroom. In this case throw an error
-	 * to user and a do the best we can.
-	 */
-	if (marker < 0) {
-		e_warn(drv, "Packet Buffer(%i) can not provide enough"
-			    "headroom to suppport flow control."
-			    "Decrease MTU or number of traffic classes\n", pb);
-		marker = tc + 1;
-	}
-
-	return marker;
-}
-
-/*
- * ixgbe_lpbthresh - calculate low water mark for for flow control
- *
- * @adapter: board private structure to calculate for
- * @pb - packet buffer to calculate
- */
-static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *dev = adapter->netdev;
-	int tc;
-	u32 dv_id;
-
-	/* Calculate max LAN frame size */
-	tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
-
-#ifdef IXGBE_FCOE
-	/* FCoE traffic class uses FCOE jumbo frames */
-	if (dev->features & NETIF_F_FCOE_MTU) {
-		int fcoe_pb = 0;
-
-		fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
-
-		if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
-			tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
-	}
-#endif
-
-	/* Calculate delay value for device */
-	switch (hw->mac.type) {
-	case ixgbe_mac_X540:
-		dv_id = IXGBE_LOW_DV_X540(tc);
-		break;
-	default:
-		dv_id = IXGBE_LOW_DV(tc);
-		break;
-	}
-
-	/* Delay value is calculated in bit times convert to KB */
-	return IXGBE_BT2KB(dv_id);
-}
-
-/*
- * ixgbe_pbthresh_setup - calculate and setup high low water marks
- */
-static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int num_tc = netdev_get_num_tc(adapter->netdev);
-	int i;
-
-	if (!num_tc)
-		num_tc = 1;
-	if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS)
-		num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS;
-
-	for (i = 0; i < num_tc; i++) {
-		hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
-		hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
-
-		/* Low water marks must not be larger than high water marks */
-		if (hw->fc.low_water[i] > hw->fc.high_water[i])
-			hw->fc.low_water[i] = 0;
-	}
-
-	for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++)
-		hw->fc.high_water[i] = 0;
-}
-
-
-
-#ifdef NO_VNIC
-static void ixgbe_configure(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	ixgbe_configure_pb(adapter);
-	ixgbe_configure_dcb(adapter);
-
-	ixgbe_set_rx_mode(adapter->netdev);
-#ifdef NETIF_F_HW_VLAN_TX
-	ixgbe_restore_vlan(adapter);
-#endif
-
-#ifdef IXGBE_FCOE
-	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
-		ixgbe_configure_fcoe(adapter);
-
-#endif /* IXGBE_FCOE */
-
-	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
-		hw->mac.ops.disable_sec_rx_path(hw);
-
-	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
-		ixgbe_init_fdir_signature_82599(&adapter->hw,
-						adapter->fdir_pballoc);
-	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
-		ixgbe_init_fdir_perfect_82599(&adapter->hw,
-					      adapter->fdir_pballoc);
-		ixgbe_fdir_filter_restore(adapter);
-	}
-
-	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
-		hw->mac.ops.enable_sec_rx_path(hw);
-
-	ixgbe_configure_virtualization(adapter);
-
-	ixgbe_configure_tx(adapter);
-	ixgbe_configure_rx(adapter);
-}
-#endif
-
-static bool ixgbe_is_sfp(struct ixgbe_hw *hw)
-{
-	switch (hw->phy.type) {
-	case ixgbe_phy_sfp_avago:
-	case ixgbe_phy_sfp_ftl:
-	case ixgbe_phy_sfp_intel:
-	case ixgbe_phy_sfp_unknown:
-	case ixgbe_phy_sfp_passive_tyco:
-	case ixgbe_phy_sfp_passive_unknown:
-	case ixgbe_phy_sfp_active_unknown:
-	case ixgbe_phy_sfp_ftl_active:
-		return true;
-	case ixgbe_phy_nl:
-		if (hw->mac.type == ixgbe_mac_82598EB)
-			return true;
-	default:
-		return false;
-	}
-}
-
-
-/**
- * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset
- * @adapter: board private structure
- *
- * On a reset we need to clear out the VF stats or accounting gets
- * messed up because they're not clear on read.
- **/
-void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	int i;
-
-	for (i = 0; i < adapter->num_vfs; i++) {
-		adapter->vfinfo[i].last_vfstats.gprc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gprc +=
-			adapter->vfinfo[i].vfstats.gprc;
-		adapter->vfinfo[i].vfstats.gprc = 0;
-		adapter->vfinfo[i].last_vfstats.gptc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gptc +=
-			adapter->vfinfo[i].vfstats.gptc;
-		adapter->vfinfo[i].vfstats.gptc = 0;
-		adapter->vfinfo[i].last_vfstats.gorc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gorc +=
-			adapter->vfinfo[i].vfstats.gorc;
-		adapter->vfinfo[i].vfstats.gorc = 0;
-		adapter->vfinfo[i].last_vfstats.gotc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gotc +=
-			adapter->vfinfo[i].vfstats.gotc;
-		adapter->vfinfo[i].vfstats.gotc = 0;
-		adapter->vfinfo[i].last_vfstats.mprc =
-			IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.mprc +=
-			adapter->vfinfo[i].vfstats.mprc;
-		adapter->vfinfo[i].vfstats.mprc = 0;
-	}
-}
-
-
-
-void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
-{
-#ifdef NO_VNIC
-	WARN_ON(in_interrupt());
-	/* put off any impending NetWatchDogTimeout */
-	adapter->netdev->trans_start = jiffies;
-
-	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
-	ixgbe_down(adapter);
-	/*
-	 * If SR-IOV enabled then wait a bit before bringing the adapter
-	 * back up to give the VFs time to respond to the reset.  The
-	 * two second wait is based upon the watchdog timer cycle in
-	 * the VF driver.
-	 */
-	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-		msleep(2000);
-	ixgbe_up(adapter);
-	clear_bit(__IXGBE_RESETTING, &adapter->state);
-#endif
-}
-
-void ixgbe_up(struct ixgbe_adapter *adapter)
-{
-	/* hardware has been reset, we need to reload some things */
-	//ixgbe_configure(adapter);
-
-	//ixgbe_up_complete(adapter);
-}
-
-void ixgbe_reset(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	int err;
-
-	/* lock SFP init bit to prevent race conditions with the watchdog */
-	while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
-		usleep_range(1000, 2000);
-
-	/* clear all SFP and link config related flags while holding SFP_INIT */
-	adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
-			     IXGBE_FLAG2_SFP_NEEDS_RESET);
-	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
-
-	err = hw->mac.ops.init_hw(hw);
-	switch (err) {
-	case 0:
-	case IXGBE_ERR_SFP_NOT_PRESENT:
-	case IXGBE_ERR_SFP_NOT_SUPPORTED:
-		break;
-	case IXGBE_ERR_MASTER_REQUESTS_PENDING:
-		e_dev_err("master disable timed out\n");
-		break;
-	case IXGBE_ERR_EEPROM_VERSION:
-		/* We are running on a pre-production device, log a warning */
-		e_dev_warn("This device is a pre-production adapter/LOM. "
-			   "Please be aware there may be issues associated "
-			   "with your hardware.  If you are experiencing "
-			   "problems please contact your Intel or hardware "
-			   "representative who provided you with this "
-			   "hardware.\n");
-		break;
-	default:
-		e_dev_err("Hardware Error: %d\n", err);
-	}
-
-	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
-
-	ixgbe_flush_sw_mac_table(adapter);
-	memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
-	       netdev->addr_len);
-	adapter->mac_table[0].queue = adapter->num_vfs;
-	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
-					IXGBE_MAC_STATE_IN_USE);
-	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
-				adapter->mac_table[0].queue,
-				IXGBE_RAH_AV);
-}
-
-
-
-
-
-
-void ixgbe_down(struct ixgbe_adapter *adapter)
-{
-#ifdef NO_VNIC
-	struct net_device *netdev = adapter->netdev;
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 rxctrl;
-	int i;
-
-	/* signal that we are down to the interrupt handler */
-	set_bit(__IXGBE_DOWN, &adapter->state);
-
-	/* disable receives */
-	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
-	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
-
-	/* disable all enabled rx queues */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		/* this call also flushes the previous write */
-		ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
-
-	usleep_range(10000, 20000);
-
-	netif_tx_stop_all_queues(netdev);
-
-	/* call carrier off first to avoid false dev_watchdog timeouts */
-	netif_carrier_off(netdev);
-	netif_tx_disable(netdev);
-
-	ixgbe_irq_disable(adapter);
-
-	ixgbe_napi_disable_all(adapter);
-
-	adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
-			     IXGBE_FLAG2_RESET_REQUESTED);
-	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
-
-	del_timer_sync(&adapter->service_timer);
-
-	if (adapter->num_vfs) {
-		/* Clear EITR Select mapping */
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
-
-		/* Mark all the VFs as inactive */
-		for (i = 0 ; i < adapter->num_vfs; i++)
-			adapter->vfinfo[i].clear_to_send = 0;
-
-		/* ping all the active vfs to let them know we are going down */
-		ixgbe_ping_all_vfs(adapter);
-
-		/* Disable all VFTE/VFRE TX/RX */
-		ixgbe_disable_tx_rx(adapter);
-	}
-
-	/* disable transmits in the hardware now that interrupts are off */
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		u8 reg_idx = adapter->tx_ring[i]->reg_idx;
-		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
-	}
-
-	/* Disable the Tx DMA engine on 82599 and X540 */
-	switch (hw->mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
-				(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
-				 ~IXGBE_DMATXCTL_TE));
-		break;
-	default:
-		break;
-	}
-
-#ifdef HAVE_PCI_ERS
-	if (!pci_channel_offline(adapter->pdev))
-#endif
-		ixgbe_reset(adapter);
-	/* power down the optics */
-	if ((hw->phy.multispeed_fiber) ||
-	    ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
-	     (hw->mac.type == ixgbe_mac_82599EB)))
-		ixgbe_disable_tx_laser(hw);
-
-	ixgbe_clean_all_tx_rings(adapter);
-	ixgbe_clean_all_rx_rings(adapter);
-
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-	/* since we reset the hardware DCA settings were cleared */
-	ixgbe_setup_dca(adapter);
-#endif
-
-#endif /* NO_VNIC */
-}
-
-#ifndef NO_VNIC
-
-#undef IXGBE_FCOE
-
-/* Artificial max queue cap per traffic class in DCB mode */
-#define DCB_QUEUE_CAP 8
-
-/**
- * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device
- * @adapter: board private structure to initialize
- *
- * When DCB (Data Center Bridging) is enabled, allocate queues for
- * each traffic class.  If multiqueue isn't available,then abort DCB
- * initialization.
- *
- * This function handles all combinations of DCB, RSS, and FCoE.
- *
- **/
-static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
-{
-       int tcs;
-#ifdef HAVE_MQPRIO
-       int rss_i, i, offset = 0;
-       struct net_device *dev = adapter->netdev;
-
-       /* Map queue offset and counts onto allocated tx queues */
-       tcs = netdev_get_num_tc(dev);
-
-       if (!tcs)
-              return false;
-
-       rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus());
-
-       if (rss_i > DCB_QUEUE_CAP)
-              rss_i = DCB_QUEUE_CAP;
-
-       for (i = 0; i < tcs; i++) {
-              netdev_set_tc_queue(dev, i, rss_i, offset);
-              offset += rss_i;
-       }
-
-       adapter->num_tx_queues = rss_i * tcs;
-       adapter->num_rx_queues = rss_i * tcs;
-
-#ifdef IXGBE_FCOE
-       /* FCoE enabled queues require special configuration indexed
-        * by feature specific indices and mask. Here we map FCoE
-        * indices onto the DCB queue pairs allowing FCoE to own
-        * configuration later.
-        */
-
-       if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
-              struct ixgbe_ring_feature *f;
-              int tc;
-              u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0};
-
-              ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg,
-                                    IXGBE_DCB_TX_CONFIG,
-                                    prio_tc);
-              tc = prio_tc[adapter->fcoe.up];
-
-              f = &adapter->ring_feature[RING_F_FCOE];
-              f->indices = min_t(int, rss_i, f->indices);
-              f->mask = rss_i * tc;
-       }
-#endif /* IXGBE_FCOE */
-#else
-       if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-              return false;
-
-       /* Enable one Queue per traffic class */
-       tcs = adapter->tc;
-       if (!tcs)
-              return false;
-
-#ifdef IXGBE_FCOE
-       if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
-              struct ixgbe_ring_feature *f;
-              int tc = netdev_get_prio_tc_map(adapter->netdev,
-                                          adapter->fcoe.up);
-
-              f = &adapter->ring_feature[RING_F_FCOE];
-
-              /*
-               * We have max 8 queues for FCoE, where 8 the is
-               * FCoE redirection table size.  We must also share
-               * ring resources with network traffic so if FCoE TC is
-               * 4 or greater and we are in 8 TC mode we can only use
-               * 7 queues.
-               */
-              if ((tcs > 4) && (tc >= 4) && (f->indices > 7))
-                     f->indices = 7;
-
-              f->indices = min_t(int, num_online_cpus(), f->indices);
-              f->mask = tcs;
-
-              adapter->num_rx_queues = f->indices + tcs;
-              adapter->num_tx_queues = f->indices + tcs;
-
-              return true;
-       }
-
-#endif /* IXGBE_FCOE */
-       adapter->num_rx_queues = tcs;
-       adapter->num_tx_queues = tcs;
-#endif /* HAVE_MQ */
-
-       return true;
-}
-
-/**
- * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices
- * @adapter: board private structure to initialize
- *
- * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues
- * and VM pools where appropriate.  If RSS is available, then also try and
- * enable RSS and map accordingly.
- *
- **/
-static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter)
-{
-       int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices;
-       int vmdq_m = 0;
-       int rss_i = adapter->ring_feature[RING_F_RSS].indices;
-       unsigned long i;
-       int rss_shift;
-       bool ret = false;
-
-
-       switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED
-                               | IXGBE_FLAG_DCB_ENABLED
-                               | IXGBE_FLAG_VMDQ_ENABLED)) {
-
-       case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED):
-              switch (adapter->hw.mac.type) {
-              case ixgbe_mac_82599EB:
-              case ixgbe_mac_X540:
-                     vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i);
-                     if (vmdq_i > 32)
-                            rss_i = 2;
-                     else
-                            rss_i = 4;
-                     i = rss_i;
-                     rss_shift = find_first_bit(&i, sizeof(i) * 8);
-                     vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) <<
-                               rss_shift) & (MAX_RX_QUEUES - 1);
-                     break;
-              default:
-                     break;
-              }
-              adapter->num_rx_queues = vmdq_i * rss_i;
-              adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i);
-              ret = true;
-              break;
-
-       case (IXGBE_FLAG_VMDQ_ENABLED):
-              switch (adapter->hw.mac.type) {
-              case ixgbe_mac_82598EB:
-                     vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1);
-                     break;
-              case ixgbe_mac_82599EB:
-              case ixgbe_mac_X540:
-                     vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1;
-                     break;
-              default:
-                     break;
-              }
-              adapter->num_rx_queues = vmdq_i;
-              adapter->num_tx_queues = vmdq_i;
-              ret = true;
-              break;
-
-       default:
-              ret = false;
-              goto vmdq_queues_out;
-       }
-
-       if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
-              adapter->num_rx_pools = vmdq_i;
-              adapter->num_rx_queues_per_pool = adapter->num_rx_queues /
-                                            vmdq_i;
-       } else {
-              adapter->num_rx_pools = adapter->num_rx_queues;
-              adapter->num_rx_queues_per_pool = 1;
-       }
-       /* save the mask for later use */
-       adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
-vmdq_queues_out:
-       return ret;
-}
-
-/**
- * ixgbe_set_rss_queues: Allocate queues for RSS
- * @adapter: board private structure to initialize
- *
- * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
- * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
- *
- **/
-static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
-{
-       struct ixgbe_ring_feature *f;
-
-       if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) {
-              adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-              return false;
-       }
-
-       /* set mask for 16 queue limit of RSS */
-       f = &adapter->ring_feature[RING_F_RSS];
-       f->mask = 0xF;
-
-       /*
-        * Use Flow Director in addition to RSS to ensure the best
-        * distribution of flows across cores, even when an FDIR flow
-        * isn't matched.
-        */
-       if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
-              f = &adapter->ring_feature[RING_F_FDIR];
-
-              f->indices = min_t(int, num_online_cpus(), f->indices);
-              f->mask = 0;
-       }
-
-       adapter->num_rx_queues = f->indices;
-#ifdef HAVE_TX_MQ
-       adapter->num_tx_queues = f->indices;
-#endif
-
-       return true;
-}
-
-#ifdef IXGBE_FCOE
-/**
- * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE)
- * @adapter: board private structure to initialize
- *
- * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges.
- * The ring feature mask is not used as a mask for FCoE, as it can take any 8
- * rx queues out of the max number of rx queues, instead, it is used as the
- * index of the first rx queue used by FCoE.
- *
- **/
-static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
-{
-       struct ixgbe_ring_feature *f;
-
-       if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
-              return false;
-
-       ixgbe_set_rss_queues(adapter);
-
-       f = &adapter->ring_feature[RING_F_FCOE];
-       f->indices = min_t(int, num_online_cpus(), f->indices);
-
-       /* adding FCoE queues */
-       f->mask = adapter->num_rx_queues;
-       adapter->num_rx_queues += f->indices;
-       adapter->num_tx_queues += f->indices;
-
-       return true;
-}
-
-#endif /* IXGBE_FCOE */
-/*
- * ixgbe_set_num_queues: Allocate queues for device, feature dependent
- * @adapter: board private structure to initialize
- *
- * This is the top level queue allocation routine.  The order here is very
- * important, starting with the "most" number of features turned on at once,
- * and ending with the smallest set of features.  This way large combinations
- * can be allocated if they're turned on, and smaller combinations are the
- * fallthrough conditions.
- *
- **/
-static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
-{
-       /* Start with base case */
-       adapter->num_rx_queues = 1;
-       adapter->num_tx_queues = 1;
-       adapter->num_rx_pools = adapter->num_rx_queues;
-       adapter->num_rx_queues_per_pool = 1;
-
-       if (ixgbe_set_vmdq_queues(adapter))
-              return;
-
-       if (ixgbe_set_dcb_queues(adapter))
-              return;
-
-#ifdef IXGBE_FCOE
-       if (ixgbe_set_fcoe_queues(adapter))
-              return;
-
-#endif /* IXGBE_FCOE */
-       ixgbe_set_rss_queues(adapter);
-}
-
-#endif
-
-
-/**
- * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter)
- * @adapter: board private structure to initialize
- *
- * ixgbe_sw_init initializes the Adapter private data structure.
- * Fields are initialized based on PCI device information and
- * OS network device settings (MTU size).
- **/
-static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct pci_dev *pdev = adapter->pdev;
-	int err;
-
-	/* PCI config space info */
-
-	hw->vendor_id = pdev->vendor;
-	hw->device_id = pdev->device;
-	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
-	hw->subsystem_vendor_id = pdev->subsystem_vendor;
-	hw->subsystem_device_id = pdev->subsystem_device;
-
-	err = ixgbe_init_shared_code(hw);
-	if (err) {
-		e_err(probe, "init_shared_code failed: %d\n", err);
-		goto out;
-	}
-	adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
-				     hw->mac.num_rar_entries,
-				     GFP_ATOMIC);
-	/* Set capability flags */
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
-				  IXGBE_FLAG_MSIX_CAPABLE |
-				  IXGBE_FLAG_MQ_CAPABLE |
-				  IXGBE_FLAG_RSS_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-		adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
-#endif
-		adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE;
-		adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE;
-
-		if (hw->device_id == IXGBE_DEV_ID_82598AT)
-			adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
-
-		adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598;
-		break;
-	case ixgbe_mac_X540:
-		adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
-	case ixgbe_mac_82599EB:
-		adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
-				  IXGBE_FLAG_MSIX_CAPABLE |
-				  IXGBE_FLAG_MQ_CAPABLE |
-				  IXGBE_FLAG_RSS_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
-#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
-		adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
-#endif
-		adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE;
-		adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
-#ifdef IXGBE_FCOE
-		adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
-		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
-		adapter->ring_feature[RING_F_FCOE].indices = 0;
-#ifdef CONFIG_DCB
-		/* Default traffic class to use for FCoE */
-		adapter->fcoe.tc = IXGBE_FCOE_DEFTC;
-		adapter->fcoe.up = IXGBE_FCOE_DEFTC;
-		adapter->fcoe.up_set = IXGBE_FCOE_DEFTC;
-#endif
-#endif
-		if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
-			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
-#ifndef IXGBE_NO_SMART_SPEED
-		hw->phy.smart_speed = ixgbe_smart_speed_on;
-#else
-		hw->phy.smart_speed = ixgbe_smart_speed_off;
-#endif
-		adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599;
-	default:
-		break;
-	}
-
-	/* n-tuple support exists, always init our spinlock */
-	//spin_lock_init(&adapter->fdir_perfect_lock);
-
-	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) {
-		int j;
-		struct ixgbe_dcb_tc_config *tc;
-		int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS;
-
-
-		adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i;
-		adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i;
-		for (j = 0; j < dcb_i; j++) {
-			tc = &adapter->dcb_cfg.tc_config[j];
-			tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0;
-			tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i;
-			tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0;
-			tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i;
-			tc->pfc = ixgbe_dcb_pfc_disabled;
-			if (j == 0) {
-				/* total of all TCs bandwidth needs to be 100 */
-				tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent +=
-								 100 % dcb_i;
-				tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent +=
-								 100 % dcb_i;
-			}
-		}
-
-		/* Initialize default user to priority mapping, UPx->TC0 */
-		tc = &adapter->dcb_cfg.tc_config[0];
-		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF;
-		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF;
-
-		adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100;
-		adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100;
-		adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal;
-		adapter->dcb_cfg.pfc_mode_enable = false;
-		adapter->dcb_cfg.round_robin_enable = false;
-		adapter->dcb_set_bitmap = 0x00;
-#ifdef CONFIG_DCB
-		adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE;
-#endif /* CONFIG_DCB */
-
-		if (hw->mac.type == ixgbe_mac_X540) {
-			adapter->dcb_cfg.num_tcs.pg_tcs = 4;
-			adapter->dcb_cfg.num_tcs.pfc_tcs = 4;
-		}
-	}
-#ifdef CONFIG_DCB
-	/* XXX does this need to be initialized even w/o DCB? */
-	//memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
-	//       sizeof(adapter->temp_dcb_cfg));
-
-#endif
-	//if (hw->mac.type == ixgbe_mac_82599EB ||
-	//    hw->mac.type == ixgbe_mac_X540)
-	//	hw->mbx.ops.init_params(hw);
-
-	/* default flow control settings */
-	hw->fc.requested_mode = ixgbe_fc_full;
-	hw->fc.current_mode = ixgbe_fc_full;	/* init for ethtool output */
-
-	adapter->last_lfc_mode = hw->fc.current_mode;
-	ixgbe_pbthresh_setup(adapter);
-	hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
-	hw->fc.send_xon = true;
-	hw->fc.disable_fc_autoneg = false;
-
-	/* set default ring sizes */
-	adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
-	adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
-
-	/* set default work limits */
-	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
-	adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK;
-
-	set_bit(__IXGBE_DOWN, &adapter->state);
-out:
-	return err;
-}
-
-/**
- * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors)
- * @tx_ring:    tx descriptor ring (for a specific queue) to setup
- *
- * Return 0 on success, negative on failure
- **/
-int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
-{
-	struct device *dev = tx_ring->dev;
-	//int orig_node = dev_to_node(dev);
-	int numa_node = -1;
-	int size;
-
-	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
-
-	if (tx_ring->q_vector)
-		numa_node = tx_ring->q_vector->numa_node;
-
-	tx_ring->tx_buffer_info = vzalloc_node(size, numa_node);
-	if (!tx_ring->tx_buffer_info)
-		tx_ring->tx_buffer_info = vzalloc(size);
-	if (!tx_ring->tx_buffer_info)
-		goto err;
-
-	/* round up to nearest 4K */
-	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
-	tx_ring->size = ALIGN(tx_ring->size, 4096);
-
-	//set_dev_node(dev, numa_node);
-	//tx_ring->desc = dma_alloc_coherent(dev,
-	//				   tx_ring->size,
-	//				   &tx_ring->dma,
-	//				   GFP_KERNEL);
-	//set_dev_node(dev, orig_node);
-	//if (!tx_ring->desc)
-	//	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
-	//					   &tx_ring->dma, GFP_KERNEL);
-	//if (!tx_ring->desc)
-	//	goto err;
-
-	return 0;
-
-err:
-	vfree(tx_ring->tx_buffer_info);
-	tx_ring->tx_buffer_info = NULL;
-	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
-	return -ENOMEM;
-}
-
-/**
- * ixgbe_setup_all_tx_resources - allocate all queues Tx resources
- * @adapter: board private structure
- *
- * If this function returns with an error, then it's possible one or
- * more of the rings is populated (while the rest are not).  It is the
- * callers duty to clean those orphaned rings.
- *
- * Return 0 on success, negative on failure
- **/
-static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
-{
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
-		if (!err)
-			continue;
-		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
-		break;
-	}
-
-	return err;
-}
-
-/**
- * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors)
- * @rx_ring:    rx descriptor ring (for a specific queue) to setup
- *
- * Returns 0 on success, negative on failure
- **/
-int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
-{
-	struct device *dev = rx_ring->dev;
-	//int orig_node = dev_to_node(dev);
-	int numa_node = -1;
-	int size;
-
-	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
-
-	if (rx_ring->q_vector)
-		numa_node = rx_ring->q_vector->numa_node;
-
-	rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
-	if (!rx_ring->rx_buffer_info)
-		rx_ring->rx_buffer_info = vzalloc(size);
-	if (!rx_ring->rx_buffer_info)
-		goto err;
-
-	/* Round up to nearest 4K */
-	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
-	rx_ring->size = ALIGN(rx_ring->size, 4096);
-
-#ifdef NO_VNIC
-	set_dev_node(dev, numa_node);
-	rx_ring->desc = dma_alloc_coherent(dev,
-					   rx_ring->size,
-					   &rx_ring->dma,
-					   GFP_KERNEL);
-	set_dev_node(dev, orig_node);
-	if (!rx_ring->desc)
-		rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
-						   &rx_ring->dma, GFP_KERNEL);
-	if (!rx_ring->desc)
-		goto err;
-
-#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-	ixgbe_init_rx_page_offset(rx_ring);
-
-#endif
-
-#endif /* NO_VNIC */
-	return 0;
-err:
-	vfree(rx_ring->rx_buffer_info);
-	rx_ring->rx_buffer_info = NULL;
-	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
-	return -ENOMEM;
-}
-
-/**
- * ixgbe_setup_all_rx_resources - allocate all queues Rx resources
- * @adapter: board private structure
- *
- * If this function returns with an error, then it's possible one or
- * more of the rings is populated (while the rest are not).  It is the
- * callers duty to clean those orphaned rings.
- *
- * Return 0 on success, negative on failure
- **/
-static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
-{
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
-		if (!err)
-			continue;
-		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
-		break;
-	}
-
-	return err;
-}
-
-/**
- * ixgbe_free_tx_resources - Free Tx Resources per Queue
- * @tx_ring: Tx descriptor ring for a specific queue
- *
- * Free all transmit software resources
- **/
-void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
-{
-	//ixgbe_clean_tx_ring(tx_ring);
-
-	vfree(tx_ring->tx_buffer_info);
-	tx_ring->tx_buffer_info = NULL;
-
-	/* if not set, then don't free */
-	if (!tx_ring->desc)
-		return;
-
-	//dma_free_coherent(tx_ring->dev, tx_ring->size,
-	//		  tx_ring->desc, tx_ring->dma);
-
-	tx_ring->desc = NULL;
-}
-
-/**
- * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all transmit software resources
- **/
-static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		if (adapter->tx_ring[i]->desc)
-			ixgbe_free_tx_resources(adapter->tx_ring[i]);
-}
-
-/**
- * ixgbe_free_rx_resources - Free Rx Resources
- * @rx_ring: ring to clean the resources from
- *
- * Free all receive software resources
- **/
-void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
-{
-	//ixgbe_clean_rx_ring(rx_ring);
-
-	vfree(rx_ring->rx_buffer_info);
-	rx_ring->rx_buffer_info = NULL;
-
-	/* if not set, then don't free */
-	if (!rx_ring->desc)
-		return;
-
-	//dma_free_coherent(rx_ring->dev, rx_ring->size,
-	//		  rx_ring->desc, rx_ring->dma);
-
-	rx_ring->desc = NULL;
-}
-
-/**
- * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all receive software resources
- **/
-static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		if (adapter->rx_ring[i]->desc)
-			ixgbe_free_rx_resources(adapter->rx_ring[i]);
-}
-
-
-/**
- * ixgbe_open - Called when a network interface is made active
- * @netdev: network interface device structure
- *
- * Returns 0 on success, negative value on failure
- *
- * The open entry point is called when a network interface is made
- * active by the system (IFF_UP).  At this point all resources needed
- * for transmit and receive operations are allocated, the interrupt
- * handler is registered with the OS, the watchdog timer is started,
- * and the stack is notified that the interface is ready.
- **/
-//static
-int ixgbe_open(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int err;
-
-	/* disallow open during test */
-	if (test_bit(__IXGBE_TESTING, &adapter->state))
-		return -EBUSY;
-
-	netif_carrier_off(netdev);
-
-	/* allocate transmit descriptors */
-	err = ixgbe_setup_all_tx_resources(adapter);
-	if (err)
-		goto err_setup_tx;
-
-	/* allocate receive descriptors */
-	err = ixgbe_setup_all_rx_resources(adapter);
-	if (err)
-		goto err_setup_rx;
-
-#ifdef NO_VNIC
-	ixgbe_configure(adapter);
-
-	err = ixgbe_request_irq(adapter);
-	if (err)
-		goto err_req_irq;
-
-	ixgbe_up_complete(adapter);
-
-err_req_irq:
-#else
-	return 0;
-#endif
-err_setup_rx:
-	ixgbe_free_all_rx_resources(adapter);
-err_setup_tx:
-	ixgbe_free_all_tx_resources(adapter);
-	ixgbe_reset(adapter);
-
-	return err;
-}
-
-/**
- * ixgbe_close - Disables a network interface
- * @netdev: network interface device structure
- *
- * Returns 0, this is not allowed to fail
- *
- * The close entry point is called when an interface is de-activated
- * by the OS.  The hardware is still under the drivers control, but
- * needs to be disabled.  A global MAC reset is issued to stop the
- * hardware, and all transmit and receive resources are freed.
- **/
-//static
-int ixgbe_close(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	//ixgbe_down(adapter);
-	//ixgbe_free_irq(adapter);
-
-	//ixgbe_fdir_filter_exit(adapter);
-
-	//ixgbe_free_all_tx_resources(adapter);
-	//ixgbe_free_all_rx_resources(adapter);
-
-	ixgbe_release_hw_control(adapter);
-
-	return 0;
-}
-
-
-
-
-
-/**
- * ixgbe_get_stats - Get System Network Statistics
- * @netdev: network interface device structure
- *
- * Returns the address of the device statistics structure.
- * The statistics are actually updated from the timer callback.
- **/
-//static
-struct net_device_stats *ixgbe_get_stats(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	/* update the stats data */
-	ixgbe_update_stats(adapter);
-
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	/* only return the current stats */
-	return &netdev->stats;
-#else
-	/* only return the current stats */
-	return &adapter->net_stats;
-#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
-}
-
-/**
- * ixgbe_update_stats - Update the board statistics counters.
- * @adapter: board private structure
- **/
-void ixgbe_update_stats(struct ixgbe_adapter *adapter)
-{
-#ifdef HAVE_NETDEV_STATS_IN_NETDEV
-	struct net_device_stats *net_stats = &adapter->netdev->stats;
-#else
-	struct net_device_stats *net_stats = &adapter->net_stats;
-#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_hw_stats *hwstats = &adapter->stats;
-	u64 total_mpc = 0;
-	u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
-	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
-	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
-	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
-#ifndef IXGBE_NO_LRO
-	u32 flushed = 0, coal = 0;
-	int num_q_vectors = 1;
-#endif
-#ifdef IXGBE_FCOE
-	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
-	unsigned int cpu;
-	u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0;
-#endif /* IXGBE_FCOE */
-
-	printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n",
-			adapter->num_tx_queues, adapter->num_rx_queues);
-
-	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
-	    test_bit(__IXGBE_RESETTING, &adapter->state))
-		return;
-
-#ifndef IXGBE_NO_LRO
-	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
-		num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-#endif
-	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
-		u64 rsc_count = 0;
-		u64 rsc_flush = 0;
-		for (i = 0; i < adapter->num_rx_queues; i++) {
-			rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count;
-			rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush;
-		}
-		adapter->rsc_total_count = rsc_count;
-		adapter->rsc_total_flush = rsc_flush;
-	}
-
-#ifndef IXGBE_NO_LRO
-	for (i = 0; i < num_q_vectors; i++) {
-		struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
-		if (!q_vector)
-			continue;
-		flushed += q_vector->lrolist.stats.flushed;
-		coal += q_vector->lrolist.stats.coal;
-	}
-	adapter->lro_stats.flushed = flushed;
-	adapter->lro_stats.coal = coal;
-
-#endif
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
-		non_eop_descs += rx_ring->rx_stats.non_eop_descs;
-		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
-		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
-		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
-		bytes += rx_ring->stats.bytes;
-		packets += rx_ring->stats.packets;
-
-	}
-	adapter->non_eop_descs = non_eop_descs;
-	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
-	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
-	adapter->hw_csum_rx_error = hw_csum_rx_error;
-	net_stats->rx_bytes = bytes;
-	net_stats->rx_packets = packets;
-
-	bytes = 0;
-	packets = 0;
-	/* gather some stats to the adapter struct that are per queue */
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
-		restart_queue += tx_ring->tx_stats.restart_queue;
-		tx_busy += tx_ring->tx_stats.tx_busy;
-		bytes += tx_ring->stats.bytes;
-		packets += tx_ring->stats.packets;
-	}
-	adapter->restart_queue = restart_queue;
-	adapter->tx_busy = tx_busy;
-	net_stats->tx_bytes = bytes;
-	net_stats->tx_packets = packets;
-
-	hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
-
-	/* 8 register reads */
-	for (i = 0; i < 8; i++) {
-		/* for packet buffers not used, the register should read 0 */
-		mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i));
-		missed_rx += mpc;
-		hwstats->mpc[i] += mpc;
-		total_mpc += hwstats->mpc[i];
-		hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
-		hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
-		switch (hw->mac.type) {
-		case ixgbe_mac_82598EB:
-			hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
-			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
-			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
-			hwstats->pxonrxc[i] +=
-				IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
-			break;
-		case ixgbe_mac_82599EB:
-		case ixgbe_mac_X540:
-			hwstats->pxonrxc[i] +=
-				IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
-			break;
-		default:
-			break;
-		}
-	}
-
-	/*16 register reads */
-	for (i = 0; i < 16; i++) {
-		hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
-		hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
-		if ((hw->mac.type == ixgbe_mac_82599EB) ||
-		    (hw->mac.type == ixgbe_mac_X540)) {
-			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
-			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
-			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
-			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */
-		}
-	}
-
-	hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
-	/* work around hardware counting issue */
-	hwstats->gprc -= missed_rx;
-
-	ixgbe_update_xoff_received(adapter);
-
-	/* 82598 hardware only has a 32 bit counter in the high register */
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
-		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
-		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
-		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
-		break;
-	case ixgbe_mac_X540:
-		/* OS2BMC stats are X540 only*/
-		hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
-		hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
-		hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
-		hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
-	case ixgbe_mac_82599EB:
-		for (i = 0; i < 16; i++)
-			adapter->hw_rx_no_dma_resources +=
-					     IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
-		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
-		IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
-		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
-		IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */
-		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
-		IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
-		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
-#ifdef HAVE_TX_MQ
-		hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
-		hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
-#endif /* HAVE_TX_MQ */
-#ifdef IXGBE_FCOE
-		hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
-		hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
-		hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
-		hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
-		hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
-		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
-		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
-		/* Add up per cpu counters for total ddp aloc fail */
-		if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) {
-			for_each_possible_cpu(cpu) {
-				fcoe_noddp_counts_sum +=
-					*per_cpu_ptr(fcoe->pcpu_noddp, cpu);
-				fcoe_noddp_ext_buff_counts_sum +=
-					*per_cpu_ptr(fcoe->
-						pcpu_noddp_ext_buff, cpu);
-			}
-		}
-		hwstats->fcoe_noddp = fcoe_noddp_counts_sum;
-		hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum;
-
-#endif /* IXGBE_FCOE */
-		break;
-	default:
-		break;
-	}
-	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
-	hwstats->bprc += bprc;
-	hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
-	if (hw->mac.type == ixgbe_mac_82598EB)
-		hwstats->mprc -= bprc;
-	hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
-	hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
-	hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
-	hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
-	hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
-	hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
-	hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
-	hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
-	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
-	hwstats->lxontxc += lxon;
-	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
-	hwstats->lxofftxc += lxoff;
-	hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
-	hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
-	/*
-	 * 82598 errata - tx of flow control packets is included in tx counters
-	 */
-	xon_off_tot = lxon + lxoff;
-	hwstats->gptc -= xon_off_tot;
-	hwstats->mptc -= xon_off_tot;
-	hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN));
-	hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
-	hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
-	hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
-	hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
-	hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
-	hwstats->ptc64 -= xon_off_tot;
-	hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
-	hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
-	hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
-	hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
-	hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
-	hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
-	/* Fill out the OS statistics structure */
-	net_stats->multicast = hwstats->mprc;
-
-	/* Rx Errors */
-	net_stats->rx_errors = hwstats->crcerrs +
-				       hwstats->rlec;
-	net_stats->rx_dropped = 0;
-	net_stats->rx_length_errors = hwstats->rlec;
-	net_stats->rx_crc_errors = hwstats->crcerrs;
-	net_stats->rx_missed_errors = total_mpc;
-
-	/*
-	 * VF Stats Collection - skip while resetting because these
-	 * are not clear on read and otherwise you'll sometimes get
-	 * crazy values.
-	 */
-	if (!test_bit(__IXGBE_RESETTING, &adapter->state)) {
-		for (i = 0; i < adapter->num_vfs; i++) {
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),	      \
-					adapter->vfinfo[i].last_vfstats.gprc, \
-					adapter->vfinfo[i].vfstats.gprc);
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),	      \
-					adapter->vfinfo[i].last_vfstats.gptc, \
-					adapter->vfinfo[i].vfstats.gptc);
-			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),	      \
-					IXGBE_PVFGORC_MSB(i),		      \
-					adapter->vfinfo[i].last_vfstats.gorc, \
-					adapter->vfinfo[i].vfstats.gorc);
-			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),	      \
-					IXGBE_PVFGOTC_MSB(i),		      \
-					adapter->vfinfo[i].last_vfstats.gotc, \
-					adapter->vfinfo[i].vfstats.gotc);
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),	      \
-					adapter->vfinfo[i].last_vfstats.mprc, \
-					adapter->vfinfo[i].vfstats.mprc);
-		}
-	}
-}
-
-
-#ifdef NO_VNIC
-
-/**
- * ixgbe_watchdog_update_link - update the link status
- * @adapter - pointer to the device adapter structure
- * @link_speed - pointer to a u32 to store the link_speed
- **/
-static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 link_speed = adapter->link_speed;
-	bool link_up = adapter->link_up;
-	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
-
-	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE))
-		return;
-
-	if (hw->mac.ops.check_link) {
-		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-	} else {
-		/* always assume link is up, if no check link function */
-		link_speed = IXGBE_LINK_SPEED_10GB_FULL;
-		link_up = true;
-	}
-
-#ifdef HAVE_DCBNL_IEEE
-	if (adapter->ixgbe_ieee_pfc)
-		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
-
-#endif
-	if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) {
-		hw->mac.ops.fc_enable(hw);
-		//ixgbe_set_rx_drop_en(adapter);
-	}
-
-	if (link_up ||
-	    time_after(jiffies, (adapter->link_check_timeout +
-				 IXGBE_TRY_LINK_TIMEOUT))) {
-		adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
-		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC);
-		IXGBE_WRITE_FLUSH(hw);
-	}
-
-	adapter->link_up = link_up;
-	adapter->link_speed = link_speed;
-}
-#endif
-
-
-
-#ifdef NO_VNIC
-
-/**
- * ixgbe_service_task - manages and runs subtasks
- * @work: pointer to work_struct containing our data
- **/
-static void ixgbe_service_task(struct work_struct *work)
-{
-	//struct ixgbe_adapter *adapter = container_of(work,
-	//					     struct ixgbe_adapter,
-	//					     service_task);
-
-	//ixgbe_reset_subtask(adapter);
-	//ixgbe_sfp_detection_subtask(adapter);
-	//ixgbe_sfp_link_config_subtask(adapter);
-	//ixgbe_check_overtemp_subtask(adapter);
-	//ixgbe_watchdog_subtask(adapter);
-#ifdef HAVE_TX_MQ
-	//ixgbe_fdir_reinit_subtask(adapter);
-#endif
-	//ixgbe_check_hang_subtask(adapter);
-
-	//ixgbe_service_event_complete(adapter);
-}
-
-
-
-
-#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
-		       IXGBE_TXD_CMD_RS)
-
-
-/**
- * ixgbe_set_mac - Change the Ethernet Address of the NIC
- * @netdev: network interface device structure
- * @p: pointer to an address structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int ixgbe_set_mac(struct net_device *netdev, void *p)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct sockaddr *addr = p;
-	int ret;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	ixgbe_del_mac_filter(adapter, hw->mac.addr,
-			     adapter->num_vfs);
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
-
-
-	/* set the correct pool for the new PF MAC address in entry 0 */
-	ret = ixgbe_add_mac_filter(adapter, hw->mac.addr,
-				    adapter->num_vfs);
-	return ret > 0 ? 0 : ret;
-}
-
-
-/**
- * ixgbe_ioctl -
- * @netdev:
- * @ifreq:
- * @cmd:
- **/
-static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
-	switch (cmd) {
-#ifdef ETHTOOL_OPS_COMPAT
-	case SIOCETHTOOL:
-		return ethtool_ioctl(ifr);
-#endif
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-#endif /* NO_VNIC */
-
-
-void ixgbe_do_reset(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	if (netif_running(netdev))
-		ixgbe_reinit_locked(adapter);
-	else
-		ixgbe_reset(adapter);
-}
-
-
-
-
-
-
-/**
- * ixgbe_probe - Device Initialization Routine
- * @pdev: PCI device information struct
- * @ent: entry in ixgbe_pci_tbl
- *
- * Returns 0 on success, negative on failure
- *
- * ixgbe_probe initializes an adapter identified by a pci_dev structure.
- * The OS initialization, configuring of the adapter private structure,
- * and a hardware reset occur.
- **/
-//static
-int ixgbe_kni_probe(struct pci_dev *pdev,
-				 struct net_device **lad_dev)
-{
-	size_t count;
-	struct net_device *netdev;
-	struct ixgbe_adapter *adapter = NULL;
-	struct ixgbe_hw *hw = NULL;
-	static int cards_found;
-	int i, err;
-	u16 offset;
-	u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl;
-	u32 etrack_id;
-	u16 build, major, patch;
-	char *info_string, *i_s_var;
-	u8 part_str[IXGBE_PBANUM_LENGTH];
-	enum ixgbe_mac_type mac_type = ixgbe_mac_unknown;
-#ifdef HAVE_TX_MQ
-	unsigned int indices = num_possible_cpus();
-#endif /* HAVE_TX_MQ */
-#ifdef IXGBE_FCOE
-	u16 device_caps;
-#endif
-	u16 wol_cap;
-
-	err = pci_enable_device_mem(pdev);
-	if (err)
-		return err;
-
-
-#ifdef NO_VNIC
-	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
-					   IORESOURCE_MEM), ixgbe_driver_name);
-	if (err) {
-		dev_err(pci_dev_to_dev(pdev),
-			"pci_request_selected_regions failed 0x%x\n", err);
-		goto err_pci_reg;
-	}
-#endif
-
-	/*
-	 * The mac_type is needed before we have the adapter is  set up
-	 * so rather than maintain two devID -> MAC tables we dummy up
-	 * an ixgbe_hw stuct and use ixgbe_set_mac_type.
-	 */
-	hw = vmalloc(sizeof(struct ixgbe_hw));
-	if (!hw) {
-		pr_info("Unable to allocate memory for early mac "
-			"check\n");
-	} else {
-		hw->vendor_id = pdev->vendor;
-		hw->device_id = pdev->device;
-		ixgbe_set_mac_type(hw);
-		mac_type = hw->mac.type;
-		vfree(hw);
-	}
-
-#ifdef NO_VNIC
-	/*
-	 * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch
-	 * port to which the 82598 is connected to prevent duplicate
-	 * completions caused by LOs.  We need the mac type so that we only
-	 * do this on 82598 devices, ixgbe_set_mac_type does this for us if
-	 * we set it's device ID.
-	 */
-	if (mac_type == ixgbe_mac_82598EB)
-		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
-
-	pci_enable_pcie_error_reporting(pdev);
-
-	pci_set_master(pdev);
-#endif
-
-#ifdef HAVE_TX_MQ
-#ifdef CONFIG_DCB
-#ifdef HAVE_MQPRIO
-	indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS;
-#else
-	indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES);
-#endif /* HAVE_MQPRIO */
-#endif /* CONFIG_DCB */
-
-	if (mac_type == ixgbe_mac_82598EB)
-		indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES);
-	else
-		indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES);
-
-#ifdef IXGBE_FCOE
-	indices += min_t(unsigned int, num_possible_cpus(),
-			 IXGBE_MAX_FCOE_INDICES);
-#endif
-	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
-#else /* HAVE_TX_MQ */
-	netdev = alloc_etherdev(sizeof(struct ixgbe_adapter));
-#endif /* HAVE_TX_MQ */
-	if (!netdev) {
-		err = -ENOMEM;
-		goto err_alloc_etherdev;
-	}
-
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	adapter = netdev_priv(netdev);
-	//pci_set_drvdata(pdev, adapter);
-
-	adapter->netdev = netdev;
-	adapter->pdev = pdev;
-	hw = &adapter->hw;
-	hw->back = adapter;
-	adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
-
-#ifdef HAVE_PCI_ERS
-	/*
-	 * call save state here in standalone driver because it relies on
-	 * adapter struct to exist, and needs to call netdev_priv
-	 */
-	pci_save_state(pdev);
-
-#endif
-	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
-			      pci_resource_len(pdev, 0));
-	if (!hw->hw_addr) {
-		err = -EIO;
-		goto err_ioremap;
-	}
-	//ixgbe_assign_netdev_ops(netdev);
-	ixgbe_set_ethtool_ops(netdev);
-
-	strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
-
-	adapter->bd_number = cards_found;
-
-	/* setup the private structure */
-	err = ixgbe_sw_init(adapter);
-	if (err)
-		goto err_sw_init;
-
-	/* Make it possible the adapter to be woken up via WOL */
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
-		break;
-	default:
-		break;
-	}
-
-	/*
-	 * check_options must be called before setup_link to set up
-	 * hw->fc completely
-	 */
-	//ixgbe_check_options(adapter);
-
-#ifndef NO_VNIC
-	/* reset_hw fills in the perm_addr as well */
-	hw->phy.reset_if_overtemp = true;
-	err = hw->mac.ops.reset_hw(hw);
-	hw->phy.reset_if_overtemp = false;
-	if (err == IXGBE_ERR_SFP_NOT_PRESENT &&
-	    hw->mac.type == ixgbe_mac_82598EB) {
-		err = 0;
-	} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
-		e_dev_err("failed to load because an unsupported SFP+ "
-			  "module type was detected.\n");
-		e_dev_err("Reload the driver after installing a supported "
-			  "module.\n");
-		goto err_sw_init;
-	} else if (err) {
-		e_dev_err("HW Init failed: %d\n", err);
-		goto err_sw_init;
-	}
-#endif
-
-	//if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-	//	ixgbe_probe_vf(adapter);
-
-
-#ifdef MAX_SKB_FRAGS
-	netdev->features |= NETIF_F_SG |
-			    NETIF_F_IP_CSUM;
-
-#ifdef NETIF_F_IPV6_CSUM
-	netdev->features |= NETIF_F_IPV6_CSUM;
-#endif
-
-#ifdef NETIF_F_HW_VLAN_TX
-	netdev->features |= NETIF_F_HW_VLAN_TX |
-			    NETIF_F_HW_VLAN_RX;
-#endif
-#ifdef NETIF_F_TSO
-	netdev->features |= NETIF_F_TSO;
-#endif /* NETIF_F_TSO */
-#ifdef NETIF_F_TSO6
-	netdev->features |= NETIF_F_TSO6;
-#endif /* NETIF_F_TSO6 */
-#ifdef NETIF_F_RXHASH
-	netdev->features |= NETIF_F_RXHASH;
-#endif /* NETIF_F_RXHASH */
-
-#ifdef HAVE_NDO_SET_FEATURES
-	netdev->features |= NETIF_F_RXCSUM;
-
-	/* copy netdev features into list of user selectable features */
-	netdev->hw_features |= netdev->features;
-
-	/* give us the option of enabling RSC/LRO later */
-#ifdef IXGBE_NO_LRO
-	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
-#endif
-		netdev->hw_features |= NETIF_F_LRO;
-
-#else
-#ifdef NETIF_F_GRO
-
-	/* this is only needed on kernels prior to 2.6.39 */
-	netdev->features |= NETIF_F_GRO;
-#endif /* NETIF_F_GRO */
-#endif
-
-#ifdef NETIF_F_HW_VLAN_TX
-	/* set this bit last since it cannot be part of hw_features */
-	netdev->features |= NETIF_F_HW_VLAN_FILTER;
-#endif
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-		netdev->features |= NETIF_F_SCTP_CSUM;
-#ifdef HAVE_NDO_SET_FEATURES
-		netdev->hw_features |= NETIF_F_SCTP_CSUM |
-				       NETIF_F_NTUPLE;
-#endif
-		break;
-	default:
-		break;
-	}
-
-#ifdef HAVE_NETDEV_VLAN_FEATURES
-	netdev->vlan_features |= NETIF_F_SG |
-				 NETIF_F_IP_CSUM |
-				 NETIF_F_IPV6_CSUM |
-				 NETIF_F_TSO |
-				 NETIF_F_TSO6;
-
-#endif /* HAVE_NETDEV_VLAN_FEATURES */
-	/*
-	 * If perfect filters were enabled in check_options(), enable them
-	 * on the netdevice too.
-	 */
-	if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
-		netdev->features |= NETIF_F_NTUPLE;
-	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
-		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
-		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		/* clear n-tuple support in the netdev unconditionally */
-		netdev->features &= ~NETIF_F_NTUPLE;
-	}
-
-#ifdef NETIF_F_RXHASH
-	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
-		netdev->features &= ~NETIF_F_RXHASH;
-
-#endif /* NETIF_F_RXHASH */
-	if (netdev->features & NETIF_F_LRO) {
-		if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
-		    ((adapter->rx_itr_setting == 1) ||
-		     (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) {
-			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
-		} else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
-#ifdef IXGBE_NO_LRO
-			e_info(probe, "InterruptThrottleRate set too high, "
-			       "disabling RSC\n");
-#else
-			e_info(probe, "InterruptThrottleRate set too high, "
-			       "falling back to software LRO\n");
-#endif
-		}
-	}
-#ifdef CONFIG_DCB
-	//netdev->dcbnl_ops = &dcbnl_ops;
-#endif
-
-#ifdef IXGBE_FCOE
-#ifdef NETIF_F_FSO
-	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
-		ixgbe_get_device_caps(hw, &device_caps);
-		if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) {
-			adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
-			adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
-			e_info(probe, "FCoE offload feature is not available. "
-			       "Disabling FCoE offload feature\n");
-		}
-#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
-		else {
-			adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
-			adapter->ring_feature[RING_F_FCOE].indices =
-				IXGBE_FCRETA_SIZE;
-			netdev->features |= NETIF_F_FSO |
-					    NETIF_F_FCOE_CRC |
-					    NETIF_F_FCOE_MTU;
-			netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
-		}
-#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
-#ifdef HAVE_NETDEV_VLAN_FEATURES
-		netdev->vlan_features |= NETIF_F_FSO |
-					 NETIF_F_FCOE_CRC |
-					 NETIF_F_FCOE_MTU;
-#endif /* HAVE_NETDEV_VLAN_FEATURES */
-	}
-#endif /* NETIF_F_FSO */
-#endif /* IXGBE_FCOE */
-
-#endif /* MAX_SKB_FRAGS */
-	/* make sure the EEPROM is good */
-	if (hw->eeprom.ops.validate_checksum &&
-	    (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) {
-		e_dev_err("The EEPROM Checksum Is Not Valid\n");
-		err = -EIO;
-		goto err_sw_init;
-	}
-
-	memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
-#ifdef ETHTOOL_GPERMADDR
-	memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
-
-	if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
-		e_dev_err("invalid MAC address\n");
-		err = -EIO;
-		goto err_sw_init;
-	}
-#else
-	if (ixgbe_validate_mac_addr(netdev->dev_addr)) {
-		e_dev_err("invalid MAC address\n");
-		err = -EIO;
-		goto err_sw_init;
-	}
-#endif
-	memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
-	       netdev->addr_len);
-	adapter->mac_table[0].queue = adapter->num_vfs;
-	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
-				       IXGBE_MAC_STATE_IN_USE);
-	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
-			    adapter->mac_table[0].queue,
-			    IXGBE_RAH_AV);
-
-	//setup_timer(&adapter->service_timer, &ixgbe_service_timer,
-	//	    (unsigned long) adapter);
-
-	//INIT_WORK(&adapter->service_task, ixgbe_service_task);
-	//clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
-
-	//err = ixgbe_init_interrupt_scheme(adapter);
-	//if (err)
-	//	goto err_sw_init;
-
-	//adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-	ixgbe_set_num_queues(adapter);
-
-	adapter->wol = 0;
-	/* WOL not supported for all but the following */
-	switch (pdev->device) {
-	case IXGBE_DEV_ID_82599_SFP:
-		/* Only these subdevice supports WOL */
-		switch (pdev->subsystem_device) {
-		case IXGBE_SUBDEV_ID_82599_560FLR:
-			/* only support first port */
-			if (hw->bus.func != 0)
-				break;
-		case IXGBE_SUBDEV_ID_82599_SFP:
-			adapter->wol = IXGBE_WUFC_MAG;
-			break;
-		}
-		break;
-	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
-		/* All except this subdevice support WOL */
-		if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
-			adapter->wol = IXGBE_WUFC_MAG;
-		break;
-	case IXGBE_DEV_ID_82599_KX4:
-		adapter->wol = IXGBE_WUFC_MAG;
-		break;
-	case IXGBE_DEV_ID_X540T:
-		/* Check eeprom to see if it is enabled */
-		ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap);
-		wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
-
-		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
-		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
-		     (hw->bus.func == 0)))
-			adapter->wol = IXGBE_WUFC_MAG;
-		break;
-	}
-	//device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
-
-
-	/*
-	 * Save off EEPROM version number and Option Rom version which
-	 * together make a unique identify for the eeprom
-	 */
-	ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh);
-	ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl);
-
-	etrack_id = (eeprom_verh << 16) | eeprom_verl;
-
-	ixgbe_read_eeprom(hw, 0x17, &offset);
-
-	/* Make sure offset to SCSI block is valid */
-	if (!(offset == 0x0) && !(offset == 0xffff)) {
-		ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh);
-		ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl);
-
-		/* Only display Option Rom if exist */
-		if (eeprom_cfg_blkl && eeprom_cfg_blkh) {
-			major = eeprom_cfg_blkl >> 8;
-			build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8);
-			patch = eeprom_cfg_blkh & 0x00ff;
-
-			snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
-				 "0x%08x, %d.%d.%d", etrack_id, major, build,
-				 patch);
-		} else {
-			snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
-				 "0x%08x", etrack_id);
-		}
-	} else {
-		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
-			 "0x%08x", etrack_id);
-	}
-
-	/* reset the hardware with the new settings */
-	err = hw->mac.ops.start_hw(hw);
-	if (err == IXGBE_ERR_EEPROM_VERSION) {
-		/* We are running on a pre-production device, log a warning */
-		e_dev_warn("This device is a pre-production adapter/LOM. "
-			   "Please be aware there may be issues associated "
-			   "with your hardware.  If you are experiencing "
-			   "problems please contact your Intel or hardware "
-			   "representative who provided you with this "
-			   "hardware.\n");
-	}
-	/* pick up the PCI bus settings for reporting later */
-	if (hw->mac.ops.get_bus_info)
-		hw->mac.ops.get_bus_info(hw);
-
-	strlcpy(netdev->name, "eth%d", sizeof(netdev->name));
-	*lad_dev = netdev;
-
-	adapter->netdev_registered = true;
-#ifdef NO_VNIC
-	/* power down the optics */
-	if ((hw->phy.multispeed_fiber) ||
-	    ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
-	     (hw->mac.type == ixgbe_mac_82599EB)))
-		ixgbe_disable_tx_laser(hw);
-
-	/* carrier off reporting is important to ethtool even BEFORE open */
-	netif_carrier_off(netdev);
-	/* keep stopping all the transmit queues for older kernels */
-	netif_tx_stop_all_queues(netdev);
-#endif
-
-	/* print all messages at the end so that we use our eth%d name */
-	/* print bus type/speed/width info */
-	e_dev_info("(PCI Express:%s:%s) ",
-		   (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
-		   hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" :
-		   "Unknown"),
-		   (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
-		   hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
-		   hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" :
-		   "Unknown"));
-
-	/* print the MAC address */
-	for (i = 0; i < 6; i++)
-		pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
-
-	/* First try to read PBA as a string */
-	err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH);
-	if (err)
-		strlcpy(part_str, "Unknown", sizeof(part_str));
-	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
-		e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
-		       hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str);
-	else
-		e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n",
-		      hw->mac.type, hw->phy.type, part_str);
-
-	if (((hw->bus.speed == ixgbe_bus_speed_2500) &&
-	     (hw->bus.width <= ixgbe_bus_width_pcie_x4)) ||
-	    (hw->bus.width <= ixgbe_bus_width_pcie_x2)) {
-		e_dev_warn("PCI-Express bandwidth available for this "
-			   "card is not sufficient for optimal "
-			   "performance.\n");
-		e_dev_warn("For optimal performance a x8 PCI-Express "
-			   "slot is required.\n");
-	}
-
-#define INFO_STRING_LEN 255
-	info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
-	if (!info_string) {
-		e_err(probe, "allocation for info string failed\n");
-		goto no_info_string;
-	}
-	count = 0;
-	i_s_var = info_string;
-	count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: ");
-
-	i_s_var = info_string + count;
-	count += snprintf(i_s_var, (INFO_STRING_LEN - count),
-			"RxQ: %d TxQ: %d ", adapter->num_rx_queues,
-					adapter->num_tx_queues);
-	i_s_var = info_string + count;
-#ifdef IXGBE_FCOE
-	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE ");
-		i_s_var = info_string + count;
-	}
-#endif
-	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count,
-							"FdirHash ");
-		i_s_var = info_string + count;
-	}
-	if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count,
-						"FdirPerfect ");
-		i_s_var = info_string + count;
-	}
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB ");
-		i_s_var = info_string + count;
-	}
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS ");
-		i_s_var = info_string + count;
-	}
-	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA ");
-		i_s_var = info_string + count;
-	}
-	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC ");
-		i_s_var = info_string + count;
-	}
-#ifndef IXGBE_NO_LRO
-	else if (netdev->features & NETIF_F_LRO) {
-		count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO ");
-		i_s_var = info_string + count;
-	}
-#endif
-
-	BUG_ON(i_s_var > (info_string + INFO_STRING_LEN));
-	/* end features printing */
-	e_info(probe, "%s\n", info_string);
-	kfree(info_string);
-no_info_string:
-
-	/* firmware requires blank driver version */
-	ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF);
-
-#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN)
-	/* add san mac addr to netdev */
-	//ixgbe_add_sanmac_netdev(netdev);
-
-#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */
-	e_info(probe, "Intel(R) 10 Gigabit Network Connection\n");
-	cards_found++;
-
-#ifdef IXGBE_SYSFS
-	//if (ixgbe_sysfs_init(adapter))
-	//	e_err(probe, "failed to allocate sysfs resources\n");
-#else
-#ifdef IXGBE_PROCFS
-	//if (ixgbe_procfs_init(adapter))
-	//	e_err(probe, "failed to allocate procfs resources\n");
-#endif /* IXGBE_PROCFS */
-#endif /* IXGBE_SYSFS */
-
-	return 0;
-
-//err_register:
-	//ixgbe_clear_interrupt_scheme(adapter);
-	//ixgbe_release_hw_control(adapter);
-err_sw_init:
-	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
-	if (adapter->mac_table)
-		kfree(adapter->mac_table);
-	iounmap(hw->hw_addr);
-err_ioremap:
-	free_netdev(netdev);
-err_alloc_etherdev:
-	//pci_release_selected_regions(pdev,
-	//			     pci_select_bars(pdev, IORESOURCE_MEM));
-//err_pci_reg:
-//err_dma:
-	pci_disable_device(pdev);
-	return err;
-}
-
-/**
- * ixgbe_remove - Device Removal Routine
- * @pdev: PCI device information struct
- *
- * ixgbe_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
- * Hot-Plug event, or because the driver is going to be removed from
- * memory.
- **/
-void ixgbe_kni_remove(struct pci_dev *pdev)
-{
-	pci_disable_device(pdev);
-}
-
-
-u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
-{
-	u16 value;
-	struct ixgbe_adapter *adapter = hw->back;
-
-	pci_read_config_word(adapter->pdev, reg, &value);
-	return value;
-}
-
-void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
-{
-	struct ixgbe_adapter *adapter = hw->back;
-
-	pci_write_config_word(adapter->pdev, reg, value);
-}
-
-void ewarn(struct ixgbe_hw *hw, const char *st, u32 status)
-{
-	struct ixgbe_adapter *adapter = hw->back;
-
-	netif_warn(adapter, drv, adapter->netdev,  "%s", st);
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
deleted file mode 100644
index 53ace941..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_MBX_H_
-#define _IXGBE_MBX_H_
-
-#include "ixgbe_type.h"
-
-#define IXGBE_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
-#define IXGBE_ERR_MBX		-100
-
-#define IXGBE_VFMAILBOX		0x002FC
-#define IXGBE_VFMBMEM		0x00200
-
-/* Define mailbox register bits */
-#define IXGBE_VFMAILBOX_REQ	0x00000001 /* Request for PF Ready bit */
-#define IXGBE_VFMAILBOX_ACK	0x00000002 /* Ack PF message received */
-#define IXGBE_VFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
-#define IXGBE_VFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
-#define IXGBE_VFMAILBOX_PFSTS	0x00000010 /* PF wrote a message in the MB */
-#define IXGBE_VFMAILBOX_PFACK	0x00000020 /* PF ack the previous VF msg */
-#define IXGBE_VFMAILBOX_RSTI	0x00000040 /* PF has reset indication */
-#define IXGBE_VFMAILBOX_RSTD	0x00000080 /* PF has indicated reset done */
-#define IXGBE_VFMAILBOX_R2C_BITS	0x000000B0 /* All read to clear bits */
-
-#define IXGBE_PFMAILBOX_STS	0x00000001 /* Initiate message send to VF */
-#define IXGBE_PFMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
-#define IXGBE_PFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
-#define IXGBE_PFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
-#define IXGBE_PFMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
-
-#define IXGBE_MBVFICR_VFREQ_MASK	0x0000FFFF /* bits for VF messages */
-#define IXGBE_MBVFICR_VFREQ_VF1		0x00000001 /* bit for VF 1 message */
-#define IXGBE_MBVFICR_VFACK_MASK	0xFFFF0000 /* bits for VF acks */
-#define IXGBE_MBVFICR_VFACK_VF1		0x00010000 /* bit for VF 1 ack */
-
-
-/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the
- * PF.  The reverse is true if it is IXGBE_PF_*.
- * Message ACK's are the value or'd with 0xF0000000
- */
-#define IXGBE_VT_MSGTYPE_ACK	0x80000000 /* Messages below or'd with
-					    * this are the ACK */
-#define IXGBE_VT_MSGTYPE_NACK	0x40000000 /* Messages below or'd with
-					    * this are the NACK */
-#define IXGBE_VT_MSGTYPE_CTS	0x20000000 /* Indicates that VF is still
-					    * clear to send requests */
-#define IXGBE_VT_MSGINFO_SHIFT	16
-/* bits 23:16 are used for extra info for certain messages */
-#define IXGBE_VT_MSGINFO_MASK	(0xFF << IXGBE_VT_MSGINFO_SHIFT)
-
-#define IXGBE_VF_RESET		0x01 /* VF requests reset */
-#define IXGBE_VF_SET_MAC_ADDR	0x02 /* VF requests PF to set MAC addr */
-#define IXGBE_VF_SET_MULTICAST	0x03 /* VF requests PF to set MC addr */
-#define IXGBE_VF_SET_VLAN	0x04 /* VF requests PF to set VLAN */
-#define IXGBE_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
-#define IXGBE_VF_SET_MACVLAN	0x06 /* VF requests PF for unicast filter */
-
-/* length of permanent address message returned from PF */
-#define IXGBE_VF_PERMADDR_MSG_LEN	4
-/* word in permanent address message with the current multicast type */
-#define IXGBE_VF_MC_TYPE_WORD		3
-
-#define IXGBE_PF_CONTROL_MSG		0x0100 /* PF control message */
-
-
-#define IXGBE_VF_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
-#define IXGBE_VF_MBX_INIT_DELAY		500  /* microseconds between retries */
-
-s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
-s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16);
-s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16);
-s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16);
-void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw);
-void ixgbe_init_mbx_params_vf(struct ixgbe_hw *);
-void ixgbe_init_mbx_params_pf(struct ixgbe_hw *);
-
-#endif /* _IXGBE_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
deleted file mode 100644
index 7b3f8c51..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-/* glue for the OS independent part of ixgbe
- * includes register access macros
- */
-
-#ifndef _IXGBE_OSDEP_H_
-#define _IXGBE_OSDEP_H_
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/sched.h>
-#include "kcompat.h"
-
-
-#ifndef msleep
-#define msleep(x)	do { if (in_interrupt()) { \
-				/* Don't mdelay in interrupt context! */ \
-				BUG(); \
-			} else { \
-				msleep(x); \
-			} } while (0)
-
-#endif
-
-#undef ASSERT
-
-#ifdef DBG
-#define hw_dbg(hw, S, A...)	printk(KERN_DEBUG S, ## A)
-#else
-#define hw_dbg(hw, S, A...)	do {} while (0)
-#endif
-
-#define e_dev_info(format, arg...) \
-	dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg)
-#define e_dev_warn(format, arg...) \
-	dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg)
-#define e_dev_err(format, arg...) \
-	dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg)
-#define e_dev_notice(format, arg...) \
-	dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg)
-#define e_info(msglvl, format, arg...) \
-	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
-#define e_err(msglvl, format, arg...) \
-	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
-#define e_warn(msglvl, format, arg...) \
-	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
-#define e_crit(msglvl, format, arg...) \
-	netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
-
-
-#ifdef DBG
-#define IXGBE_WRITE_REG(a, reg, value) do {\
-	switch (reg) { \
-	case IXGBE_EIMS: \
-	case IXGBE_EIMC: \
-	case IXGBE_EIAM: \
-	case IXGBE_EIAC: \
-	case IXGBE_EICR: \
-	case IXGBE_EICS: \
-		printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \
-		       reg, (u32)(value)); \
-	default: \
-		break; \
-	} \
-	writel((value), ((a)->hw_addr + (reg))); \
-} while (0)
-#else
-#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
-#endif
-
-#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg))
-
-#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \
-	writel((value), ((a)->hw_addr + (reg) + ((offset) << 2))))
-
-#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \
-	readl((a)->hw_addr + (reg) + ((offset) << 2)))
-
-#ifndef writeq
-#define writeq(val, addr)	do { writel((u32) (val), addr); \
-				     writel((u32) (val >> 32), (addr + 4)); \
-				} while (0);
-#endif
-
-#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
-
-#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS)
-struct ixgbe_hw;
-extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg);
-extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value);
-extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status);
-
-#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word
-#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word
-#define IXGBE_EEPROM_GRANT_ATTEMPS 100
-#define IXGBE_HTONL(_i) htonl(_i)
-#define IXGBE_NTOHL(_i) ntohl(_i)
-#define IXGBE_NTOHS(_i) ntohs(_i)
-#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i)
-#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i)
-#define EWARN(H, W, S) ewarn(H, W, S)
-
-#endif /* _IXGBE_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
deleted file mode 100644
index a47a2ff8..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
+++ /dev/null
@@ -1,1832 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_api.h"
-#include "ixgbe_common.h"
-#include "ixgbe_phy.h"
-
-static void ixgbe_i2c_start(struct ixgbe_hw *hw);
-static void ixgbe_i2c_stop(struct ixgbe_hw *hw);
-static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
-static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
-static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
-static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
-static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
-static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
-static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
-static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
-static bool ixgbe_get_i2c_data(u32 *i2cctl);
-
-/**
- *  ixgbe_init_phy_ops_generic - Inits PHY function ptrs
- *  @hw: pointer to the hardware structure
- *
- *  Initialize the function pointers.
- **/
-s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
-{
-	struct ixgbe_phy_info *phy = &hw->phy;
-
-	/* PHY */
-	phy->ops.identify = &ixgbe_identify_phy_generic;
-	phy->ops.reset = &ixgbe_reset_phy_generic;
-	phy->ops.read_reg = &ixgbe_read_phy_reg_generic;
-	phy->ops.write_reg = &ixgbe_write_phy_reg_generic;
-	phy->ops.setup_link = &ixgbe_setup_phy_link_generic;
-	phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic;
-	phy->ops.check_link = NULL;
-	phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic;
-	phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic;
-	phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic;
-	phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic;
-	phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic;
-	phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear;
-	phy->ops.identify_sfp = &ixgbe_identify_module_generic;
-	phy->sfp_type = ixgbe_sfp_type_unknown;
-	phy->ops.check_overtemp = &ixgbe_tn_check_overtemp;
-	return 0;
-}
-
-/**
- *  ixgbe_identify_phy_generic - Get physical layer module
- *  @hw: pointer to hardware structure
- *
- *  Determines the physical layer module found on the current adapter.
- **/
-s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
-	u32 phy_addr;
-	u16 ext_ability = 0;
-
-	if (hw->phy.type == ixgbe_phy_unknown) {
-		for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
-			if (ixgbe_validate_phy_addr(hw, phy_addr)) {
-				hw->phy.addr = phy_addr;
-				ixgbe_get_phy_id(hw);
-				hw->phy.type =
-					ixgbe_get_phy_type_from_id(hw->phy.id);
-
-				if (hw->phy.type == ixgbe_phy_unknown) {
-					hw->phy.ops.read_reg(hw,
-						  IXGBE_MDIO_PHY_EXT_ABILITY,
-						  IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-						  &ext_ability);
-					if (ext_ability &
-					    (IXGBE_MDIO_PHY_10GBASET_ABILITY |
-					     IXGBE_MDIO_PHY_1000BASET_ABILITY))
-						hw->phy.type =
-							 ixgbe_phy_cu_unknown;
-					else
-						hw->phy.type =
-							 ixgbe_phy_generic;
-				}
-
-				status = 0;
-				break;
-			}
-		}
-		/* clear value if nothing found */
-		if (status != 0)
-			hw->phy.addr = 0;
-	} else {
-		status = 0;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_validate_phy_addr - Determines phy address is valid
- *  @hw: pointer to hardware structure
- *
- **/
-bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr)
-{
-	u16 phy_id = 0;
-	bool valid = false;
-
-	hw->phy.addr = phy_addr;
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
-			     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id);
-
-	if (phy_id != 0xFFFF && phy_id != 0x0)
-		valid = true;
-
-	return valid;
-}
-
-/**
- *  ixgbe_get_phy_id - Get the phy type
- *  @hw: pointer to hardware structure
- *
- **/
-s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
-{
-	u32 status;
-	u16 phy_id_high = 0;
-	u16 phy_id_low = 0;
-
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
-				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-				      &phy_id_high);
-
-	if (status == 0) {
-		hw->phy.id = (u32)(phy_id_high << 16);
-		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW,
-					      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-					      &phy_id_low);
-		hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
-		hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
-	}
-	return status;
-}
-
-/**
- *  ixgbe_get_phy_type_from_id - Get the phy type
- *  @hw: pointer to hardware structure
- *
- **/
-enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
-{
-	enum ixgbe_phy_type phy_type;
-
-	switch (phy_id) {
-	case TN1010_PHY_ID:
-		phy_type = ixgbe_phy_tn;
-		break;
-	case X540_PHY_ID:
-		phy_type = ixgbe_phy_aq;
-		break;
-	case QT2022_PHY_ID:
-		phy_type = ixgbe_phy_qt;
-		break;
-	case ATH_PHY_ID:
-		phy_type = ixgbe_phy_nl;
-		break;
-	default:
-		phy_type = ixgbe_phy_unknown;
-		break;
-	}
-
-	hw_dbg(hw, "phy type found is %d\n", phy_type);
-	return phy_type;
-}
-
-/**
- *  ixgbe_reset_phy_generic - Performs a PHY reset
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
-{
-	u32 i;
-	u16 ctrl = 0;
-	s32 status = 0;
-
-	if (hw->phy.type == ixgbe_phy_unknown)
-		status = ixgbe_identify_phy_generic(hw);
-
-	if (status != 0 || hw->phy.type == ixgbe_phy_none)
-		goto out;
-
-	/* Don't reset PHY if it's shut down due to overtemp. */
-	if (!hw->phy.reset_if_overtemp &&
-	    (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw)))
-		goto out;
-
-	/*
-	 * Perform soft PHY reset to the PHY_XS.
-	 * This will cause a soft reset to the PHY
-	 */
-	hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-			      IXGBE_MDIO_PHY_XS_DEV_TYPE,
-			      IXGBE_MDIO_PHY_XS_RESET);
-
-	/*
-	 * Poll for reset bit to self-clear indicating reset is complete.
-	 * Some PHYs could take up to 3 seconds to complete and need about
-	 * 1.7 usec delay after the reset is complete.
-	 */
-	for (i = 0; i < 30; i++) {
-		msleep(100);
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-				     IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl);
-		if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) {
-			udelay(2);
-			break;
-		}
-	}
-
-	if (ctrl & IXGBE_MDIO_PHY_XS_RESET) {
-		status = IXGBE_ERR_RESET_FAILED;
-		hw_dbg(hw, "PHY reset polling failed to complete.\n");
-	}
-
-out:
-	return status;
-}
-
-/**
- *  ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register
- *  @hw: pointer to hardware structure
- *  @reg_addr: 32 bit address of PHY register to read
- *  @phy_data: Pointer to read data from PHY register
- **/
-s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-			       u32 device_type, u16 *phy_data)
-{
-	u32 command;
-	u32 i;
-	u32 data;
-	s32 status = 0;
-	u16 gssr;
-
-	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
-		gssr = IXGBE_GSSR_PHY1_SM;
-	else
-		gssr = IXGBE_GSSR_PHY0_SM;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	if (status == 0) {
-		/* Setup and write the address cycle command */
-		command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-			   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-			   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-			   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
-
-		IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
-
-		/*
-		 * Check every 10 usec to see if the address cycle completed.
-		 * The MDI Command bit will clear when the operation is
-		 * complete
-		 */
-		for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
-			udelay(10);
-
-			command = IXGBE_READ_REG(hw, IXGBE_MSCA);
-
-			if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
-				break;
-		}
-
-		if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
-			hw_dbg(hw, "PHY address command did not complete.\n");
-			status = IXGBE_ERR_PHY;
-		}
-
-		if (status == 0) {
-			/*
-			 * Address cycle complete, setup and write the read
-			 * command
-			 */
-			command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-				   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-				   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-				   (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND));
-
-			IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
-
-			/*
-			 * Check every 10 usec to see if the address cycle
-			 * completed. The MDI Command bit will clear when the
-			 * operation is complete
-			 */
-			for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
-				udelay(10);
-
-				command = IXGBE_READ_REG(hw, IXGBE_MSCA);
-
-				if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
-					break;
-			}
-
-			if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
-				hw_dbg(hw, "PHY read command didn't complete\n");
-				status = IXGBE_ERR_PHY;
-			} else {
-				/*
-				 * Read operation is complete.  Get the data
-				 * from MSRWD
-				 */
-				data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
-				data >>= IXGBE_MSRWD_READ_DATA_SHIFT;
-				*phy_data = (u16)(data);
-			}
-		}
-
-		hw->mac.ops.release_swfw_sync(hw, gssr);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_write_phy_reg_generic - Writes a value to specified PHY register
- *  @hw: pointer to hardware structure
- *  @reg_addr: 32 bit PHY register to write
- *  @device_type: 5 bit device type
- *  @phy_data: Data to write to the PHY register
- **/
-s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-				u32 device_type, u16 phy_data)
-{
-	u32 command;
-	u32 i;
-	s32 status = 0;
-	u16 gssr;
-
-	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
-		gssr = IXGBE_GSSR_PHY1_SM;
-	else
-		gssr = IXGBE_GSSR_PHY0_SM;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	if (status == 0) {
-		/* Put the data in the MDI single read and write data register*/
-		IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data);
-
-		/* Setup and write the address cycle command */
-		command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-			   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-			   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-			   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
-
-		IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
-
-		/*
-		 * Check every 10 usec to see if the address cycle completed.
-		 * The MDI Command bit will clear when the operation is
-		 * complete
-		 */
-		for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
-			udelay(10);
-
-			command = IXGBE_READ_REG(hw, IXGBE_MSCA);
-
-			if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
-				break;
-		}
-
-		if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
-			hw_dbg(hw, "PHY address cmd didn't complete\n");
-			status = IXGBE_ERR_PHY;
-		}
-
-		if (status == 0) {
-			/*
-			 * Address cycle complete, setup and write the write
-			 * command
-			 */
-			command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-				   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-				   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-				   (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND));
-
-			IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
-
-			/*
-			 * Check every 10 usec to see if the address cycle
-			 * completed. The MDI Command bit will clear when the
-			 * operation is complete
-			 */
-			for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
-				udelay(10);
-
-				command = IXGBE_READ_REG(hw, IXGBE_MSCA);
-
-				if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
-					break;
-			}
-
-			if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
-				hw_dbg(hw, "PHY address cmd didn't complete\n");
-				status = IXGBE_ERR_PHY;
-			}
-		}
-
-		hw->mac.ops.release_swfw_sync(hw, gssr);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_setup_phy_link_generic - Set and restart autoneg
- *  @hw: pointer to hardware structure
- *
- *  Restart autonegotiation and PHY and waits for completion.
- **/
-s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u32 time_out;
-	u32 max_time_out = 10;
-	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
-	bool autoneg = false;
-	ixgbe_link_speed speed;
-
-	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
-
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-		/* Set or unset auto-negotiation 10G advertisement */
-		hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-			autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
-
-		hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-		/* Set or unset auto-negotiation 1G advertisement */
-		hw->phy.ops.read_reg(hw,
-				     IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
-			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
-
-		hw->phy.ops.write_reg(hw,
-				      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	if (speed & IXGBE_LINK_SPEED_100_FULL) {
-		/* Set or unset auto-negotiation 100M advertisement */
-		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE |
-				 IXGBE_MII_100BASE_T_ADVERTISE_HALF);
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-			autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
-
-		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	/* Restart PHY autonegotiation and wait for completion */
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
-			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
-
-	autoneg_reg |= IXGBE_MII_RESTART;
-
-	hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
-			      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
-
-	/* Wait for autonegotiation to finish */
-	for (time_out = 0; time_out < max_time_out; time_out++) {
-		udelay(10);
-		/* Restart PHY autonegotiation and wait for completion */
-		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-					      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-					      &autoneg_reg);
-
-		autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
-		if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
-			break;
-	}
-
-	if (time_out == max_time_out) {
-		status = IXGBE_ERR_LINK_SETUP;
-		hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- **/
-s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
-				       ixgbe_link_speed speed,
-				       bool autoneg,
-				       bool autoneg_wait_to_complete)
-{
-
-	/*
-	 * Clear autoneg_advertised and set new values based on input link
-	 * speed.
-	 */
-	hw->phy.autoneg_advertised = 0;
-
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
-
-	if (speed & IXGBE_LINK_SPEED_100_FULL)
-		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
-
-	/* Setup link based on the new speed settings */
-	hw->phy.ops.setup_link(hw);
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_copper_link_capabilities_generic - Determines link capabilities
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @autoneg: boolean auto-negotiation value
- *
- *  Determines the link capabilities by reading the AUTOC register.
- **/
-s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
-					       ixgbe_link_speed *speed,
-					       bool *autoneg)
-{
-	s32 status = IXGBE_ERR_LINK_SETUP;
-	u16 speed_ability;
-
-	*speed = 0;
-	*autoneg = true;
-
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY,
-				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-				      &speed_ability);
-
-	if (status == 0) {
-		if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G)
-			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
-		if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G)
-			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
-		if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M)
-			*speed |= IXGBE_LINK_SPEED_100_FULL;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_check_phy_link_tnx - Determine link and speed status
- *  @hw: pointer to hardware structure
- *
- *  Reads the VS1 register to determine if link is up and the current speed for
- *  the PHY.
- **/
-s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
-			     bool *link_up)
-{
-	s32 status = 0;
-	u32 time_out;
-	u32 max_time_out = 10;
-	u16 phy_link = 0;
-	u16 phy_speed = 0;
-	u16 phy_data = 0;
-
-	/* Initialize speed and link to default case */
-	*link_up = false;
-	*speed = IXGBE_LINK_SPEED_10GB_FULL;
-
-	/*
-	 * Check current speed and link status of the PHY register.
-	 * This is a vendor specific register and may have to
-	 * be changed for other copper PHYs.
-	 */
-	for (time_out = 0; time_out < max_time_out; time_out++) {
-		udelay(10);
-		status = hw->phy.ops.read_reg(hw,
-					IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS,
-					IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-					&phy_data);
-		phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS;
-		phy_speed = phy_data &
-				 IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS;
-		if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) {
-			*link_up = true;
-			if (phy_speed ==
-			    IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS)
-				*speed = IXGBE_LINK_SPEED_1GB_FULL;
-			break;
-		}
-	}
-
-	return status;
-}
-
-/**
- *	ixgbe_setup_phy_link_tnx - Set and restart autoneg
- *	@hw: pointer to hardware structure
- *
- *	Restart autonegotiation and PHY and waits for completion.
- **/
-s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u32 time_out;
-	u32 max_time_out = 10;
-	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
-	bool autoneg = false;
-	ixgbe_link_speed speed;
-
-	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
-
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-		/* Set or unset auto-negotiation 10G advertisement */
-		hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-			autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
-
-		hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-		/* Set or unset auto-negotiation 1G advertisement */
-		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
-			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
-
-		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	if (speed & IXGBE_LINK_SPEED_100_FULL) {
-		/* Set or unset auto-negotiation 100M advertisement */
-		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				     &autoneg_reg);
-
-		autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-			autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
-
-		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      autoneg_reg);
-	}
-
-	/* Restart PHY autonegotiation and wait for completion */
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
-			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
-
-	autoneg_reg |= IXGBE_MII_RESTART;
-
-	hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
-			      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
-
-	/* Wait for autonegotiation to finish */
-	for (time_out = 0; time_out < max_time_out; time_out++) {
-		udelay(10);
-		/* Restart PHY autonegotiation and wait for completion */
-		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-					      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-					      &autoneg_reg);
-
-		autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
-		if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
-			break;
-	}
-
-	if (time_out == max_time_out) {
-		status = IXGBE_ERR_LINK_SETUP;
-		hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version
- *  @hw: pointer to hardware structure
- *  @firmware_version: pointer to the PHY Firmware Version
- **/
-s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
-				       u16 *firmware_version)
-{
-	s32 status = 0;
-
-	status = hw->phy.ops.read_reg(hw, TNX_FW_REV,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-				      firmware_version);
-
-	return status;
-}
-
-/**
- *  ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version
- *  @hw: pointer to hardware structure
- *  @firmware_version: pointer to the PHY Firmware Version
- **/
-s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
-					   u16 *firmware_version)
-{
-	s32 status = 0;
-
-	status = hw->phy.ops.read_reg(hw, AQ_FW_REV,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-				      firmware_version);
-
-	return status;
-}
-
-/**
- *  ixgbe_reset_phy_nl - Performs a PHY reset
- *  @hw: pointer to hardware structure
- **/
-s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
-{
-	u16 phy_offset, control, eword, edata, block_crc;
-	bool end_data = false;
-	u16 list_offset, data_offset;
-	u16 phy_data = 0;
-	s32 ret_val = 0;
-	u32 i;
-
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-			     IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
-
-	/* reset the PHY and poll for completion */
-	hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-			      IXGBE_MDIO_PHY_XS_DEV_TYPE,
-			      (phy_data | IXGBE_MDIO_PHY_XS_RESET));
-
-	for (i = 0; i < 100; i++) {
-		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-				     IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
-		if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0)
-			break;
-		msleep(10);
-	}
-
-	if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) {
-		hw_dbg(hw, "PHY reset did not complete.\n");
-		ret_val = IXGBE_ERR_PHY;
-		goto out;
-	}
-
-	/* Get init offsets */
-	ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
-						      &data_offset);
-	if (ret_val != 0)
-		goto out;
-
-	ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc);
-	data_offset++;
-	while (!end_data) {
-		/*
-		 * Read control word from PHY init contents offset
-		 */
-		ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
-		control = (eword & IXGBE_CONTROL_MASK_NL) >>
-			   IXGBE_CONTROL_SHIFT_NL;
-		edata = eword & IXGBE_DATA_MASK_NL;
-		switch (control) {
-		case IXGBE_DELAY_NL:
-			data_offset++;
-			hw_dbg(hw, "DELAY: %d MS\n", edata);
-			msleep(edata);
-			break;
-		case IXGBE_DATA_NL:
-			hw_dbg(hw, "DATA:\n");
-			data_offset++;
-			hw->eeprom.ops.read(hw, data_offset++,
-					    &phy_offset);
-			for (i = 0; i < edata; i++) {
-				hw->eeprom.ops.read(hw, data_offset, &eword);
-				hw->phy.ops.write_reg(hw, phy_offset,
-						      IXGBE_TWINAX_DEV, eword);
-				hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
-					  phy_offset);
-				data_offset++;
-				phy_offset++;
-			}
-			break;
-		case IXGBE_CONTROL_NL:
-			data_offset++;
-			hw_dbg(hw, "CONTROL:\n");
-			if (edata == IXGBE_CONTROL_EOL_NL) {
-				hw_dbg(hw, "EOL\n");
-				end_data = true;
-			} else if (edata == IXGBE_CONTROL_SOL_NL) {
-				hw_dbg(hw, "SOL\n");
-			} else {
-				hw_dbg(hw, "Bad control value\n");
-				ret_val = IXGBE_ERR_PHY;
-				goto out;
-			}
-			break;
-		default:
-			hw_dbg(hw, "Bad control type\n");
-			ret_val = IXGBE_ERR_PHY;
-			goto out;
-		}
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_identify_module_generic - Identifies module type
- *  @hw: pointer to hardware structure
- *
- *  Determines HW type and calls appropriate function.
- **/
-s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_SFP_NOT_PRESENT;
-
-	switch (hw->mac.ops.get_media_type(hw)) {
-	case ixgbe_media_type_fiber:
-		status = ixgbe_identify_sfp_module_generic(hw);
-		break;
-
-	case ixgbe_media_type_fiber_qsfp:
-		status = ixgbe_identify_qsfp_module_generic(hw);
-		break;
-
-	default:
-		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
-		status = IXGBE_ERR_SFP_NOT_PRESENT;
-		break;
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_identify_sfp_module_generic - Identifies SFP modules
- *  @hw: pointer to hardware structure
- *
- *  Searches for and identifies the SFP module and assigns appropriate PHY type.
- **/
-s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
-	u32 vendor_oui = 0;
-	enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
-	u8 identifier = 0;
-	u8 comp_codes_1g = 0;
-	u8 comp_codes_10g = 0;
-	u8 oui_bytes[3] = {0, 0, 0};
-	u8 cable_tech = 0;
-	u8 cable_spec = 0;
-	u16 enforce_sfp = 0;
-
-	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
-		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
-		status = IXGBE_ERR_SFP_NOT_PRESENT;
-		goto out;
-	}
-
-	status = hw->phy.ops.read_i2c_eeprom(hw,
-					     IXGBE_SFF_IDENTIFIER,
-					     &identifier);
-
-	if (status == IXGBE_ERR_SWFW_SYNC ||
-	    status == IXGBE_ERR_I2C ||
-	    status == IXGBE_ERR_SFP_NOT_PRESENT)
-		goto err_read_i2c_eeprom;
-
-	/* LAN ID is needed for sfp_type determination */
-	hw->mac.ops.set_lan_id(hw);
-
-	if (identifier != IXGBE_SFF_IDENTIFIER_SFP) {
-		hw->phy.type = ixgbe_phy_sfp_unsupported;
-		status = IXGBE_ERR_SFP_NOT_SUPPORTED;
-	} else {
-		status = hw->phy.ops.read_i2c_eeprom(hw,
-						     IXGBE_SFF_1GBE_COMP_CODES,
-						     &comp_codes_1g);
-
-		if (status == IXGBE_ERR_SWFW_SYNC ||
-		    status == IXGBE_ERR_I2C ||
-		    status == IXGBE_ERR_SFP_NOT_PRESENT)
-			goto err_read_i2c_eeprom;
-
-		status = hw->phy.ops.read_i2c_eeprom(hw,
-						     IXGBE_SFF_10GBE_COMP_CODES,
-						     &comp_codes_10g);
-
-		if (status == IXGBE_ERR_SWFW_SYNC ||
-		    status == IXGBE_ERR_I2C ||
-		    status == IXGBE_ERR_SFP_NOT_PRESENT)
-			goto err_read_i2c_eeprom;
-		status = hw->phy.ops.read_i2c_eeprom(hw,
-						     IXGBE_SFF_CABLE_TECHNOLOGY,
-						     &cable_tech);
-
-		if (status == IXGBE_ERR_SWFW_SYNC ||
-		    status == IXGBE_ERR_I2C ||
-		    status == IXGBE_ERR_SFP_NOT_PRESENT)
-			goto err_read_i2c_eeprom;
-
-		 /* ID Module
-		  * =========
-		  * 0   SFP_DA_CU
-		  * 1   SFP_SR
-		  * 2   SFP_LR
-		  * 3   SFP_DA_CORE0 - 82599-specific
-		  * 4   SFP_DA_CORE1 - 82599-specific
-		  * 5   SFP_SR/LR_CORE0 - 82599-specific
-		  * 6   SFP_SR/LR_CORE1 - 82599-specific
-		  * 7   SFP_act_lmt_DA_CORE0 - 82599-specific
-		  * 8   SFP_act_lmt_DA_CORE1 - 82599-specific
-		  * 9   SFP_1g_cu_CORE0 - 82599-specific
-		  * 10  SFP_1g_cu_CORE1 - 82599-specific
-		  * 11  SFP_1g_sx_CORE0 - 82599-specific
-		  * 12  SFP_1g_sx_CORE1 - 82599-specific
-		  */
-		if (hw->mac.type == ixgbe_mac_82598EB) {
-			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
-				hw->phy.sfp_type = ixgbe_sfp_type_da_cu;
-			else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
-				hw->phy.sfp_type = ixgbe_sfp_type_sr;
-			else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
-				hw->phy.sfp_type = ixgbe_sfp_type_lr;
-			else
-				hw->phy.sfp_type = ixgbe_sfp_type_unknown;
-		} else if (hw->mac.type == ixgbe_mac_82599EB) {
-			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
-				if (hw->bus.lan_id == 0)
-					hw->phy.sfp_type =
-						     ixgbe_sfp_type_da_cu_core0;
-				else
-					hw->phy.sfp_type =
-						     ixgbe_sfp_type_da_cu_core1;
-			} else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) {
-				hw->phy.ops.read_i2c_eeprom(
-						hw, IXGBE_SFF_CABLE_SPEC_COMP,
-						&cable_spec);
-				if (cable_spec &
-				    IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) {
-					if (hw->bus.lan_id == 0)
-						hw->phy.sfp_type =
-						ixgbe_sfp_type_da_act_lmt_core0;
-					else
-						hw->phy.sfp_type =
-						ixgbe_sfp_type_da_act_lmt_core1;
-				} else {
-					hw->phy.sfp_type =
-							ixgbe_sfp_type_unknown;
-				}
-			} else if (comp_codes_10g &
-				   (IXGBE_SFF_10GBASESR_CAPABLE |
-				    IXGBE_SFF_10GBASELR_CAPABLE)) {
-				if (hw->bus.lan_id == 0)
-					hw->phy.sfp_type =
-						      ixgbe_sfp_type_srlr_core0;
-				else
-					hw->phy.sfp_type =
-						      ixgbe_sfp_type_srlr_core1;
-			} else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) {
-				if (hw->bus.lan_id == 0)
-					hw->phy.sfp_type =
-						ixgbe_sfp_type_1g_cu_core0;
-				else
-					hw->phy.sfp_type =
-						ixgbe_sfp_type_1g_cu_core1;
-			} else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) {
-				if (hw->bus.lan_id == 0)
-					hw->phy.sfp_type =
-						ixgbe_sfp_type_1g_sx_core0;
-				else
-					hw->phy.sfp_type =
-						ixgbe_sfp_type_1g_sx_core1;
-			} else {
-				hw->phy.sfp_type = ixgbe_sfp_type_unknown;
-			}
-		}
-
-		if (hw->phy.sfp_type != stored_sfp_type)
-			hw->phy.sfp_setup_needed = true;
-
-		/* Determine if the SFP+ PHY is dual speed or not. */
-		hw->phy.multispeed_fiber = false;
-		if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
-		   (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
-		   ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
-		   (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)))
-			hw->phy.multispeed_fiber = true;
-
-		/* Determine PHY vendor */
-		if (hw->phy.type != ixgbe_phy_nl) {
-			hw->phy.id = identifier;
-			status = hw->phy.ops.read_i2c_eeprom(hw,
-						    IXGBE_SFF_VENDOR_OUI_BYTE0,
-						    &oui_bytes[0]);
-
-			if (status == IXGBE_ERR_SWFW_SYNC ||
-			    status == IXGBE_ERR_I2C ||
-			    status == IXGBE_ERR_SFP_NOT_PRESENT)
-				goto err_read_i2c_eeprom;
-
-			status = hw->phy.ops.read_i2c_eeprom(hw,
-						    IXGBE_SFF_VENDOR_OUI_BYTE1,
-						    &oui_bytes[1]);
-
-			if (status == IXGBE_ERR_SWFW_SYNC ||
-			    status == IXGBE_ERR_I2C ||
-			    status == IXGBE_ERR_SFP_NOT_PRESENT)
-				goto err_read_i2c_eeprom;
-
-			status = hw->phy.ops.read_i2c_eeprom(hw,
-						    IXGBE_SFF_VENDOR_OUI_BYTE2,
-						    &oui_bytes[2]);
-
-			if (status == IXGBE_ERR_SWFW_SYNC ||
-			    status == IXGBE_ERR_I2C ||
-			    status == IXGBE_ERR_SFP_NOT_PRESENT)
-				goto err_read_i2c_eeprom;
-
-			vendor_oui =
-			  ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) |
-			   (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) |
-			   (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT));
-
-			switch (vendor_oui) {
-			case IXGBE_SFF_VENDOR_OUI_TYCO:
-				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
-					hw->phy.type =
-						    ixgbe_phy_sfp_passive_tyco;
-				break;
-			case IXGBE_SFF_VENDOR_OUI_FTL:
-				if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
-					hw->phy.type = ixgbe_phy_sfp_ftl_active;
-				else
-					hw->phy.type = ixgbe_phy_sfp_ftl;
-				break;
-			case IXGBE_SFF_VENDOR_OUI_AVAGO:
-				hw->phy.type = ixgbe_phy_sfp_avago;
-				break;
-			case IXGBE_SFF_VENDOR_OUI_INTEL:
-				hw->phy.type = ixgbe_phy_sfp_intel;
-				break;
-			default:
-				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
-					hw->phy.type =
-						 ixgbe_phy_sfp_passive_unknown;
-				else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
-					hw->phy.type =
-						ixgbe_phy_sfp_active_unknown;
-				else
-					hw->phy.type = ixgbe_phy_sfp_unknown;
-				break;
-			}
-		}
-
-		/* Allow any DA cable vendor */
-		if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE |
-		    IXGBE_SFF_DA_ACTIVE_CABLE)) {
-			status = 0;
-			goto out;
-		}
-
-		/* Verify supported 1G SFP modules */
-		if (comp_codes_10g == 0 &&
-		    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
-		      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
-		      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0  ||
-		      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
-			hw->phy.type = ixgbe_phy_sfp_unsupported;
-			status = IXGBE_ERR_SFP_NOT_SUPPORTED;
-			goto out;
-		}
-
-		/* Anything else 82598-based is supported */
-		if (hw->mac.type == ixgbe_mac_82598EB) {
-			status = 0;
-			goto out;
-		}
-
-		ixgbe_get_device_caps(hw, &enforce_sfp);
-		if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
-		    !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) ||
-		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) ||
-		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0)  ||
-		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) {
-			/* Make sure we're a supported PHY type */
-			if (hw->phy.type == ixgbe_phy_sfp_intel) {
-				status = 0;
-			} else {
-				if (hw->allow_unsupported_sfp == true) {
-					EWARN(hw, "WARNING: Intel (R) Network "
-					      "Connections are quality tested "
-					      "using Intel (R) Ethernet Optics."
-					      " Using untested modules is not "
-					      "supported and may cause unstable"
-					      " operation or damage to the "
-					      "module or the adapter. Intel "
-					      "Corporation is not responsible "
-					      "for any harm caused by using "
-					      "untested modules.\n", status);
-					status = 0;
-				} else {
-					hw_dbg(hw, "SFP+ module not supported\n");
-					hw->phy.type =
-						ixgbe_phy_sfp_unsupported;
-					status = IXGBE_ERR_SFP_NOT_SUPPORTED;
-				}
-			}
-		} else {
-			status = 0;
-		}
-	}
-
-out:
-	return status;
-
-err_read_i2c_eeprom:
-	hw->phy.sfp_type = ixgbe_sfp_type_not_present;
-	if (hw->phy.type != ixgbe_phy_nl) {
-		hw->phy.id = 0;
-		hw->phy.type = ixgbe_phy_unknown;
-	}
-	return IXGBE_ERR_SFP_NOT_PRESENT;
-}
-
-/**
- *  ixgbe_identify_qsfp_module_generic - Identifies QSFP modules
- *  @hw: pointer to hardware structure
- *
- *  Searches for and identifies the QSFP module and assigns appropriate PHY type
- **/
-s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-
-	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) {
-		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
-		status = IXGBE_ERR_SFP_NOT_PRESENT;
-	}
-
-	return status;
-}
-
-
-/**
- *  ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence
- *  @hw: pointer to hardware structure
- *  @list_offset: offset to the SFP ID list
- *  @data_offset: offset to the SFP data block
- *
- *  Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if
- *  so it returns the offsets to the phy init sequence block.
- **/
-s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
-					u16 *list_offset,
-					u16 *data_offset)
-{
-	u16 sfp_id;
-	u16 sfp_type = hw->phy.sfp_type;
-
-	if (hw->phy.sfp_type == ixgbe_sfp_type_unknown)
-		return IXGBE_ERR_SFP_NOT_SUPPORTED;
-
-	if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
-		return IXGBE_ERR_SFP_NOT_PRESENT;
-
-	if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) &&
-	    (hw->phy.sfp_type == ixgbe_sfp_type_da_cu))
-		return IXGBE_ERR_SFP_NOT_SUPPORTED;
-
-	/*
-	 * Limiting active cables and 1G Phys must be initialized as
-	 * SR modules
-	 */
-	if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
-	    sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
-	    sfp_type == ixgbe_sfp_type_1g_sx_core0)
-		sfp_type = ixgbe_sfp_type_srlr_core0;
-	else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
-		 sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
-		 sfp_type == ixgbe_sfp_type_1g_sx_core1)
-		sfp_type = ixgbe_sfp_type_srlr_core1;
-
-	/* Read offset to PHY init contents */
-	hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset);
-
-	if ((!*list_offset) || (*list_offset == 0xFFFF))
-		return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
-
-	/* Shift offset to first ID word */
-	(*list_offset)++;
-
-	/*
-	 * Find the matching SFP ID in the EEPROM
-	 * and program the init sequence
-	 */
-	hw->eeprom.ops.read(hw, *list_offset, &sfp_id);
-
-	while (sfp_id != IXGBE_PHY_INIT_END_NL) {
-		if (sfp_id == sfp_type) {
-			(*list_offset)++;
-			hw->eeprom.ops.read(hw, *list_offset, data_offset);
-			if ((!*data_offset) || (*data_offset == 0xFFFF)) {
-				hw_dbg(hw, "SFP+ module not supported\n");
-				return IXGBE_ERR_SFP_NOT_SUPPORTED;
-			} else {
-				break;
-			}
-		} else {
-			(*list_offset) += 2;
-			if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
-				return IXGBE_ERR_PHY;
-		}
-	}
-
-	if (sfp_id == IXGBE_PHY_INIT_END_NL) {
-		hw_dbg(hw, "No matching SFP+ module found\n");
-		return IXGBE_ERR_SFP_NOT_SUPPORTED;
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface
- *  @hw: pointer to hardware structure
- *  @byte_offset: EEPROM byte offset to read
- *  @eeprom_data: value read
- *
- *  Performs byte read operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				  u8 *eeprom_data)
-{
-	return hw->phy.ops.read_i2c_byte(hw, byte_offset,
-					 IXGBE_I2C_EEPROM_DEV_ADDR,
-					 eeprom_data);
-}
-
-/**
- *  ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface
- *  @hw: pointer to hardware structure
- *  @byte_offset: EEPROM byte offset to write
- *  @eeprom_data: value to write
- *
- *  Performs byte write operation to SFP module's EEPROM over I2C interface.
- **/
-s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				   u8 eeprom_data)
-{
-	return hw->phy.ops.write_i2c_byte(hw, byte_offset,
-					  IXGBE_I2C_EEPROM_DEV_ADDR,
-					  eeprom_data);
-}
-
-/**
- *  ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to read
- *  @data: value read
- *
- *  Performs byte read operation to SFP module's EEPROM over I2C interface at
- *  a specified device address.
- **/
-s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data)
-{
-	s32 status = 0;
-	u32 max_retry = 10;
-	u32 retry = 0;
-	u16 swfw_mask = 0;
-	bool nack = 1;
-	*data = 0;
-
-	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
-		swfw_mask = IXGBE_GSSR_PHY1_SM;
-	else
-		swfw_mask = IXGBE_GSSR_PHY0_SM;
-
-	do {
-		if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
-		    != 0) {
-			status = IXGBE_ERR_SWFW_SYNC;
-			goto read_byte_out;
-		}
-
-		ixgbe_i2c_start(hw);
-
-		/* Device Address and write indication */
-		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		ixgbe_i2c_start(hw);
-
-		/* Device Address and read indication */
-		status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1));
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_clock_in_i2c_byte(hw, data);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_clock_out_i2c_bit(hw, nack);
-		if (status != 0)
-			goto fail;
-
-		ixgbe_i2c_stop(hw);
-		break;
-
-fail:
-		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-		msleep(100);
-		ixgbe_i2c_bus_clear(hw);
-		retry++;
-		if (retry < max_retry)
-			hw_dbg(hw, "I2C byte read error - Retrying.\n");
-		else
-			hw_dbg(hw, "I2C byte read error.\n");
-
-	} while (retry < max_retry);
-
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-read_byte_out:
-	return status;
-}
-
-/**
- *  ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
- *  @hw: pointer to hardware structure
- *  @byte_offset: byte offset to write
- *  @data: value to write
- *
- *  Performs byte write operation to SFP module's EEPROM over I2C interface at
- *  a specified device address.
- **/
-s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data)
-{
-	s32 status = 0;
-	u32 max_retry = 1;
-	u32 retry = 0;
-	u16 swfw_mask = 0;
-
-	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
-		swfw_mask = IXGBE_GSSR_PHY1_SM;
-	else
-		swfw_mask = IXGBE_GSSR_PHY0_SM;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) {
-		status = IXGBE_ERR_SWFW_SYNC;
-		goto write_byte_out;
-	}
-
-	do {
-		ixgbe_i2c_start(hw);
-
-		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_clock_out_i2c_byte(hw, data);
-		if (status != 0)
-			goto fail;
-
-		status = ixgbe_get_i2c_ack(hw);
-		if (status != 0)
-			goto fail;
-
-		ixgbe_i2c_stop(hw);
-		break;
-
-fail:
-		ixgbe_i2c_bus_clear(hw);
-		retry++;
-		if (retry < max_retry)
-			hw_dbg(hw, "I2C byte write error - Retrying.\n");
-		else
-			hw_dbg(hw, "I2C byte write error.\n");
-	} while (retry < max_retry);
-
-	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-write_byte_out:
-	return status;
-}
-
-/**
- *  ixgbe_i2c_start - Sets I2C start condition
- *  @hw: pointer to hardware structure
- *
- *  Sets I2C start condition (High -> Low on SDA while SCL is High)
- **/
-static void ixgbe_i2c_start(struct ixgbe_hw *hw)
-{
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-
-	/* Start condition must begin with data and clock high */
-	ixgbe_set_i2c_data(hw, &i2cctl, 1);
-	ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-	/* Setup time for start condition (4.7us) */
-	udelay(IXGBE_I2C_T_SU_STA);
-
-	ixgbe_set_i2c_data(hw, &i2cctl, 0);
-
-	/* Hold time for start condition (4us) */
-	udelay(IXGBE_I2C_T_HD_STA);
-
-	ixgbe_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	udelay(IXGBE_I2C_T_LOW);
-
-}
-
-/**
- *  ixgbe_i2c_stop - Sets I2C stop condition
- *  @hw: pointer to hardware structure
- *
- *  Sets I2C stop condition (Low -> High on SDA while SCL is High)
- **/
-static void ixgbe_i2c_stop(struct ixgbe_hw *hw)
-{
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-
-	/* Stop condition must begin with data low and clock high */
-	ixgbe_set_i2c_data(hw, &i2cctl, 0);
-	ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-	/* Setup time for stop condition (4us) */
-	udelay(IXGBE_I2C_T_SU_STO);
-
-	ixgbe_set_i2c_data(hw, &i2cctl, 1);
-
-	/* bus free time between stop and start (4.7us)*/
-	udelay(IXGBE_I2C_T_BUF);
-}
-
-/**
- *  ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C
- *  @hw: pointer to hardware structure
- *  @data: data byte to clock in
- *
- *  Clocks in one byte data via I2C data/clock
- **/
-static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
-{
-	s32 i;
-	bool bit = 0;
-
-	for (i = 7; i >= 0; i--) {
-		ixgbe_clock_in_i2c_bit(hw, &bit);
-		*data |= bit << i;
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C
- *  @hw: pointer to hardware structure
- *  @data: data byte clocked out
- *
- *  Clocks out one byte data via I2C data/clock
- **/
-static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
-{
-	s32 status = 0;
-	s32 i;
-	u32 i2cctl;
-	bool bit = 0;
-
-	for (i = 7; i >= 0; i--) {
-		bit = (data >> i) & 0x1;
-		status = ixgbe_clock_out_i2c_bit(hw, bit);
-
-		if (status != 0)
-			break;
-	}
-
-	/* Release SDA line (set high) */
-	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-	i2cctl |= IXGBE_I2C_DATA_OUT;
-	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return status;
-}
-
-/**
- *  ixgbe_get_i2c_ack - Polls for I2C ACK
- *  @hw: pointer to hardware structure
- *
- *  Clocks in/out one bit via I2C data/clock
- **/
-static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u32 i = 0;
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-	u32 timeout = 10;
-	bool ack = 1;
-
-	ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-
-	/* Minimum high period of clock is 4us */
-	udelay(IXGBE_I2C_T_HIGH);
-
-	/* Poll for ACK.  Note that ACK in I2C spec is
-	 * transition from 1 to 0 */
-	for (i = 0; i < timeout; i++) {
-		i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-		ack = ixgbe_get_i2c_data(&i2cctl);
-
-		udelay(1);
-		if (ack == 0)
-			break;
-	}
-
-	if (ack == 1) {
-		hw_dbg(hw, "I2C ack was not received.\n");
-		status = IXGBE_ERR_I2C;
-	}
-
-	ixgbe_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	udelay(IXGBE_I2C_T_LOW);
-
-	return status;
-}
-
-/**
- *  ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
- *  @hw: pointer to hardware structure
- *  @data: read data value
- *
- *  Clocks in one bit via I2C data/clock
- **/
-static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
-{
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-
-	ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-	/* Minimum high period of clock is 4us */
-	udelay(IXGBE_I2C_T_HIGH);
-
-	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-	*data = ixgbe_get_i2c_data(&i2cctl);
-
-	ixgbe_lower_i2c_clk(hw, &i2cctl);
-
-	/* Minimum low period of clock is 4.7 us */
-	udelay(IXGBE_I2C_T_LOW);
-
-	return 0;
-}
-
-/**
- *  ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
- *  @hw: pointer to hardware structure
- *  @data: data value to write
- *
- *  Clocks out one bit via I2C data/clock
- **/
-static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
-{
-	s32 status;
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-
-	status = ixgbe_set_i2c_data(hw, &i2cctl, data);
-	if (status == 0) {
-		ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-		/* Minimum high period of clock is 4us */
-		udelay(IXGBE_I2C_T_HIGH);
-
-		ixgbe_lower_i2c_clk(hw, &i2cctl);
-
-		/* Minimum low period of clock is 4.7 us.
-		 * This also takes care of the data hold time.
-		 */
-		udelay(IXGBE_I2C_T_LOW);
-	} else {
-		status = IXGBE_ERR_I2C;
-		hw_dbg(hw, "I2C data was not set to %X\n", data);
-	}
-
-	return status;
-}
-/**
- *  ixgbe_raise_i2c_clk - Raises the I2C SCL clock
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Raises the I2C clock line '0'->'1'
- **/
-static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
-{
-	u32 i = 0;
-	u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT;
-	u32 i2cctl_r = 0;
-
-	for (i = 0; i < timeout; i++) {
-		*i2cctl |= IXGBE_I2C_CLK_OUT;
-
-		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
-		IXGBE_WRITE_FLUSH(hw);
-		/* SCL rise time (1000ns) */
-		udelay(IXGBE_I2C_T_RISE);
-
-		i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-		if (i2cctl_r & IXGBE_I2C_CLK_IN)
-			break;
-	}
-}
-
-/**
- *  ixgbe_lower_i2c_clk - Lowers the I2C SCL clock
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Lowers the I2C clock line '1'->'0'
- **/
-static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
-{
-
-	*i2cctl &= ~IXGBE_I2C_CLK_OUT;
-
-	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* SCL fall time (300ns) */
-	udelay(IXGBE_I2C_T_FALL);
-}
-
-/**
- *  ixgbe_set_i2c_data - Sets the I2C data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *  @data: I2C data value (0 or 1) to set
- *
- *  Sets the I2C data bit
- **/
-static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
-{
-	s32 status = 0;
-
-	if (data)
-		*i2cctl |= IXGBE_I2C_DATA_OUT;
-	else
-		*i2cctl &= ~IXGBE_I2C_DATA_OUT;
-
-	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
-	udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA);
-
-	/* Verify data was set correctly */
-	*i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-	if (data != ixgbe_get_i2c_data(i2cctl)) {
-		status = IXGBE_ERR_I2C;
-		hw_dbg(hw, "Error - I2C data was not set to %X.\n", data);
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_get_i2c_data - Reads the I2C SDA data bit
- *  @hw: pointer to hardware structure
- *  @i2cctl: Current value of I2CCTL register
- *
- *  Returns the I2C data bit value
- **/
-static bool ixgbe_get_i2c_data(u32 *i2cctl)
-{
-	bool data;
-
-	if (*i2cctl & IXGBE_I2C_DATA_IN)
-		data = 1;
-	else
-		data = 0;
-
-	return data;
-}
-
-/**
- *  ixgbe_i2c_bus_clear - Clears the I2C bus
- *  @hw: pointer to hardware structure
- *
- *  Clears the I2C bus by sending nine clock pulses.
- *  Used when data line is stuck low.
- **/
-void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw)
-{
-	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
-	u32 i;
-
-	ixgbe_i2c_start(hw);
-
-	ixgbe_set_i2c_data(hw, &i2cctl, 1);
-
-	for (i = 0; i < 9; i++) {
-		ixgbe_raise_i2c_clk(hw, &i2cctl);
-
-		/* Min high period of clock is 4us */
-		udelay(IXGBE_I2C_T_HIGH);
-
-		ixgbe_lower_i2c_clk(hw, &i2cctl);
-
-		/* Min low period of clock is 4.7us*/
-		udelay(IXGBE_I2C_T_LOW);
-	}
-
-	ixgbe_i2c_start(hw);
-
-	/* Put the i2c bus back to default state */
-	ixgbe_i2c_stop(hw);
-}
-
-/**
- *  ixgbe_tn_check_overtemp - Checks if an overtemp occurred.
- *  @hw: pointer to hardware structure
- *
- *  Checks if the LASI temp alarm status was triggered due to overtemp
- **/
-s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-	u16 phy_data = 0;
-
-	if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM)
-		goto out;
-
-	/* Check that the LASI temp alarm status was triggered */
-	hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG,
-			     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data);
-
-	if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM))
-		goto out;
-
-	status = IXGBE_ERR_OVERTEMP;
-out:
-	return status;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
deleted file mode 100644
index 6baa9acb..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_PHY_H_
-#define _IXGBE_PHY_H_
-
-#include "ixgbe_type.h"
-#define IXGBE_I2C_EEPROM_DEV_ADDR    0xA0
-
-/* EEPROM byte offsets */
-#define IXGBE_SFF_IDENTIFIER		0x0
-#define IXGBE_SFF_IDENTIFIER_SFP	0x3
-#define IXGBE_SFF_VENDOR_OUI_BYTE0	0x25
-#define IXGBE_SFF_VENDOR_OUI_BYTE1	0x26
-#define IXGBE_SFF_VENDOR_OUI_BYTE2	0x27
-#define IXGBE_SFF_1GBE_COMP_CODES	0x6
-#define IXGBE_SFF_10GBE_COMP_CODES	0x3
-#define IXGBE_SFF_CABLE_TECHNOLOGY	0x8
-#define IXGBE_SFF_CABLE_SPEC_COMP	0x3C
-
-/* Bitmasks */
-#define IXGBE_SFF_DA_PASSIVE_CABLE	0x4
-#define IXGBE_SFF_DA_ACTIVE_CABLE	0x8
-#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING	0x4
-#define IXGBE_SFF_1GBASESX_CAPABLE	0x1
-#define IXGBE_SFF_1GBASELX_CAPABLE	0x2
-#define IXGBE_SFF_1GBASET_CAPABLE	0x8
-#define IXGBE_SFF_10GBASESR_CAPABLE	0x10
-#define IXGBE_SFF_10GBASELR_CAPABLE	0x20
-#define IXGBE_I2C_EEPROM_READ_MASK	0x100
-#define IXGBE_I2C_EEPROM_STATUS_MASK	0x3
-#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION	0x0
-#define IXGBE_I2C_EEPROM_STATUS_PASS	0x1
-#define IXGBE_I2C_EEPROM_STATUS_FAIL	0x2
-#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS	0x3
-
-/* Flow control defines */
-#define IXGBE_TAF_SYM_PAUSE		0x400
-#define IXGBE_TAF_ASM_PAUSE		0x800
-
-/* Bit-shift macros */
-#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT	24
-#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT	16
-#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT	8
-
-/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
-#define IXGBE_SFF_VENDOR_OUI_TYCO	0x00407600
-#define IXGBE_SFF_VENDOR_OUI_FTL	0x00906500
-#define IXGBE_SFF_VENDOR_OUI_AVAGO	0x00176A00
-#define IXGBE_SFF_VENDOR_OUI_INTEL	0x001B2100
-
-/* I2C SDA and SCL timing parameters for standard mode */
-#define IXGBE_I2C_T_HD_STA	4
-#define IXGBE_I2C_T_LOW		5
-#define IXGBE_I2C_T_HIGH	4
-#define IXGBE_I2C_T_SU_STA	5
-#define IXGBE_I2C_T_HD_DATA	5
-#define IXGBE_I2C_T_SU_DATA	1
-#define IXGBE_I2C_T_RISE	1
-#define IXGBE_I2C_T_FALL	1
-#define IXGBE_I2C_T_SU_STO	4
-#define IXGBE_I2C_T_BUF		5
-
-#define IXGBE_TN_LASI_STATUS_REG	0x9005
-#define IXGBE_TN_LASI_STATUS_TEMP_ALARM	0x0008
-
-s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw);
-bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr);
-enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
-s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
-s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
-s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
-s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-			       u32 device_type, u16 *phy_data);
-s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
-				u32 device_type, u16 phy_data);
-s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
-s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
-				       ixgbe_link_speed speed,
-				       bool autoneg,
-				       bool autoneg_wait_to_complete);
-s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
-					       ixgbe_link_speed *speed,
-					       bool *autoneg);
-
-/* PHY specific */
-s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
-			     ixgbe_link_speed *speed,
-			     bool *link_up);
-s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
-s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
-				       u16 *firmware_version);
-s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
-					   u16 *firmware_version);
-
-s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
-s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
-s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
-s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
-s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
-					u16 *list_offset,
-					u16 *data_offset);
-s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
-s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				u8 dev_addr, u8 *data);
-s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				 u8 dev_addr, u8 data);
-s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				  u8 *eeprom_data);
-s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
-				   u8 eeprom_data);
-void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw);
-#endif /* _IXGBE_PHY_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
deleted file mode 100644
index 0689590e..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
+++ /dev/null
@@ -1,3239 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_TYPE_H_
-#define _IXGBE_TYPE_H_
-
-#include "ixgbe_osdep.h"
-
-
-/* Vendor ID */
-#define IXGBE_INTEL_VENDOR_ID			0x8086
-
-/* Device IDs */
-#define IXGBE_DEV_ID_82598			0x10B6
-#define IXGBE_DEV_ID_82598_BX			0x1508
-#define IXGBE_DEV_ID_82598AF_DUAL_PORT		0x10C6
-#define IXGBE_DEV_ID_82598AF_SINGLE_PORT	0x10C7
-#define IXGBE_DEV_ID_82598AT			0x10C8
-#define IXGBE_DEV_ID_82598AT2			0x150B
-#define IXGBE_DEV_ID_82598EB_SFP_LOM		0x10DB
-#define IXGBE_DEV_ID_82598EB_CX4		0x10DD
-#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT	0x10EC
-#define IXGBE_DEV_ID_82598_DA_DUAL_PORT		0x10F1
-#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM	0x10E1
-#define IXGBE_DEV_ID_82598EB_XF_LR		0x10F4
-#define IXGBE_DEV_ID_82599_KX4			0x10F7
-#define IXGBE_DEV_ID_82599_KX4_MEZZ		0x1514
-#define IXGBE_DEV_ID_82599_KR			0x1517
-#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE	0x10F8
-#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ	0x000C
-#define IXGBE_DEV_ID_82599_CX4			0x10F9
-#define IXGBE_DEV_ID_82599_SFP			0x10FB
-#define IXGBE_SUBDEV_ID_82599_SFP		0x11A9
-#define IXGBE_SUBDEV_ID_82599_560FLR		0x17D0
-#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE	0x152A
-#define IXGBE_DEV_ID_82599_SFP_FCOE		0x1529
-#define IXGBE_DEV_ID_82599_SFP_EM		0x1507
-#define IXGBE_DEV_ID_82599_SFP_SF2		0x154D
-#define IXGBE_DEV_ID_82599_QSFP_SF_QP		0x1558
-#define IXGBE_DEV_ID_82599EN_SFP		0x1557
-#define IXGBE_DEV_ID_82599_XAUI_LOM		0x10FC
-#define IXGBE_DEV_ID_82599_T3_LOM		0x151C
-#define IXGBE_DEV_ID_82599_LS			0x154F
-#define IXGBE_DEV_ID_X540T			0x1528
-
-/* General Registers */
-#define IXGBE_CTRL		0x00000
-#define IXGBE_STATUS		0x00008
-#define IXGBE_CTRL_EXT		0x00018
-#define IXGBE_ESDP		0x00020
-#define IXGBE_EODSDP		0x00028
-#define IXGBE_I2CCTL		0x00028
-#define IXGBE_PHY_GPIO		0x00028
-#define IXGBE_MAC_GPIO		0x00030
-#define IXGBE_PHYINT_STATUS0	0x00100
-#define IXGBE_PHYINT_STATUS1	0x00104
-#define IXGBE_PHYINT_STATUS2	0x00108
-#define IXGBE_LEDCTL		0x00200
-#define IXGBE_FRTIMER		0x00048
-#define IXGBE_TCPTIMER		0x0004C
-#define IXGBE_CORESPARE		0x00600
-#define IXGBE_EXVET		0x05078
-
-/* NVM Registers */
-#define IXGBE_EEC	0x10010
-#define IXGBE_EERD	0x10014
-#define IXGBE_EEWR	0x10018
-#define IXGBE_FLA	0x1001C
-#define IXGBE_EEMNGCTL	0x10110
-#define IXGBE_EEMNGDATA	0x10114
-#define IXGBE_FLMNGCTL	0x10118
-#define IXGBE_FLMNGDATA	0x1011C
-#define IXGBE_FLMNGCNT	0x10120
-#define IXGBE_FLOP	0x1013C
-#define IXGBE_GRC	0x10200
-#define IXGBE_SRAMREL	0x10210
-#define IXGBE_PHYDBG	0x10218
-
-/* General Receive Control */
-#define IXGBE_GRC_MNG	0x00000001 /* Manageability Enable */
-#define IXGBE_GRC_APME	0x00000002 /* APM enabled in EEPROM */
-
-#define IXGBE_VPDDIAG0	0x10204
-#define IXGBE_VPDDIAG1	0x10208
-
-/* I2CCTL Bit Masks */
-#define IXGBE_I2C_CLK_IN	0x00000001
-#define IXGBE_I2C_CLK_OUT	0x00000002
-#define IXGBE_I2C_DATA_IN	0x00000004
-#define IXGBE_I2C_DATA_OUT	0x00000008
-#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT	500
-
-#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
-#define IXGBE_EMC_INTERNAL_DATA		0x00
-#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
-#define IXGBE_EMC_DIODE1_DATA		0x01
-#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
-#define IXGBE_EMC_DIODE2_DATA		0x23
-#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
-
-#define IXGBE_MAX_SENSORS		3
-
-struct ixgbe_thermal_diode_data {
-	u8 location;
-	u8 temp;
-	u8 caution_thresh;
-	u8 max_op_thresh;
-};
-
-struct ixgbe_thermal_sensor_data {
-	struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS];
-};
-
-/* Interrupt Registers */
-#define IXGBE_EICR		0x00800
-#define IXGBE_EICS		0x00808
-#define IXGBE_EIMS		0x00880
-#define IXGBE_EIMC		0x00888
-#define IXGBE_EIAC		0x00810
-#define IXGBE_EIAM		0x00890
-#define IXGBE_EICS_EX(_i)	(0x00A90 + (_i) * 4)
-#define IXGBE_EIMS_EX(_i)	(0x00AA0 + (_i) * 4)
-#define IXGBE_EIMC_EX(_i)	(0x00AB0 + (_i) * 4)
-#define IXGBE_EIAM_EX(_i)	(0x00AD0 + (_i) * 4)
-/* 82599 EITR is only 12 bits, with the lower 3 always zero */
-/*
- * 82598 EITR is 16 bits but set the limits based on the max
- * supported by all ixgbe hardware
- */
-#define IXGBE_MAX_INT_RATE	488281
-#define IXGBE_MIN_INT_RATE	956
-#define IXGBE_MAX_EITR		0x00000FF8
-#define IXGBE_MIN_EITR		8
-#define IXGBE_EITR(_i)		(((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \
-				 (0x012300 + (((_i) - 24) * 4)))
-#define IXGBE_EITR_ITR_INT_MASK	0x00000FF8
-#define IXGBE_EITR_LLI_MOD	0x00008000
-#define IXGBE_EITR_CNT_WDIS	0x80000000
-#define IXGBE_IVAR(_i)		(0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */
-#define IXGBE_IVAR_MISC		0x00A00 /* misc MSI-X interrupt causes */
-#define IXGBE_EITRSEL		0x00894
-#define IXGBE_MSIXT		0x00000 /* MSI-X Table. 0x0000 - 0x01C */
-#define IXGBE_MSIXPBA		0x02000 /* MSI-X Pending bit array */
-#define IXGBE_PBACL(_i)	(((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4)))
-#define IXGBE_GPIE		0x00898
-
-/* Flow Control Registers */
-#define IXGBE_FCADBUL		0x03210
-#define IXGBE_FCADBUH		0x03214
-#define IXGBE_FCAMACL		0x04328
-#define IXGBE_FCAMACH		0x0432C
-#define IXGBE_FCRTH_82599(_i)	(0x03260 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_FCRTL_82599(_i)	(0x03220 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_PFCTOP		0x03008
-#define IXGBE_FCTTV(_i)		(0x03200 + ((_i) * 4)) /* 4 of these (0-3) */
-#define IXGBE_FCRTL(_i)		(0x03220 + ((_i) * 8)) /* 8 of these (0-7) */
-#define IXGBE_FCRTH(_i)		(0x03260 + ((_i) * 8)) /* 8 of these (0-7) */
-#define IXGBE_FCRTV		0x032A0
-#define IXGBE_FCCFG		0x03D00
-#define IXGBE_TFCS		0x0CE00
-
-/* Receive DMA Registers */
-#define IXGBE_RDBAL(_i)	(((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
-			 (0x0D000 + (((_i) - 64) * 0x40)))
-#define IXGBE_RDBAH(_i)	(((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
-			 (0x0D004 + (((_i) - 64) * 0x40)))
-#define IXGBE_RDLEN(_i)	(((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
-			 (0x0D008 + (((_i) - 64) * 0x40)))
-#define IXGBE_RDH(_i)	(((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
-			 (0x0D010 + (((_i) - 64) * 0x40)))
-#define IXGBE_RDT(_i)	(((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
-			 (0x0D018 + (((_i) - 64) * 0x40)))
-#define IXGBE_RXDCTL(_i)	(((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \
-				 (0x0D028 + (((_i) - 64) * 0x40)))
-#define IXGBE_RSCCTL(_i)	(((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \
-				 (0x0D02C + (((_i) - 64) * 0x40)))
-#define IXGBE_RSCDBU	0x03028
-#define IXGBE_RDDCC	0x02F20
-#define IXGBE_RXMEMWRAP	0x03190
-#define IXGBE_STARCTRL	0x03024
-/*
- * Split and Replication Receive Control Registers
- * 00-15 : 0x02100 + n*4
- * 16-64 : 0x01014 + n*0x40
- * 64-127: 0x0D014 + (n-64)*0x40
- */
-#define IXGBE_SRRCTL(_i)	(((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \
-				 (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
-				 (0x0D014 + (((_i) - 64) * 0x40))))
-/*
- * Rx DCA Control Register:
- * 00-15 : 0x02200 + n*4
- * 16-64 : 0x0100C + n*0x40
- * 64-127: 0x0D00C + (n-64)*0x40
- */
-#define IXGBE_DCA_RXCTRL(_i)	(((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \
-				 (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
-				 (0x0D00C + (((_i) - 64) * 0x40))))
-#define IXGBE_RDRXCTL		0x02F00
-#define IXGBE_RDRXCTL_RSC_PUSH	0x80
-/* 8 of these 0x03C00 - 0x03C1C */
-#define IXGBE_RXPBSIZE(_i)	(0x03C00 + ((_i) * 4))
-#define IXGBE_RXCTRL		0x03000
-#define IXGBE_DROPEN		0x03D04
-#define IXGBE_RXPBSIZE_SHIFT	10
-
-/* Receive Registers */
-#define IXGBE_RXCSUM		0x05000
-#define IXGBE_RFCTL		0x05008
-#define IXGBE_DRECCCTL		0x02F08
-#define IXGBE_DRECCCTL_DISABLE	0
-#define IXGBE_DRECCCTL2		0x02F8C
-
-/* Multicast Table Array - 128 entries */
-#define IXGBE_MTA(_i)		(0x05200 + ((_i) * 4))
-#define IXGBE_RAL(_i)		(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
-				 (0x0A200 + ((_i) * 8)))
-#define IXGBE_RAH(_i)		(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
-				 (0x0A204 + ((_i) * 8)))
-#define IXGBE_MPSAR_LO(_i)	(0x0A600 + ((_i) * 8))
-#define IXGBE_MPSAR_HI(_i)	(0x0A604 + ((_i) * 8))
-/* Packet split receive type */
-#define IXGBE_PSRTYPE(_i)	(((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \
-				 (0x0EA00 + ((_i) * 4)))
-/* array of 4096 1-bit vlan filters */
-#define IXGBE_VFTA(_i)		(0x0A000 + ((_i) * 4))
-/*array of 4096 4-bit vlan vmdq indices */
-#define IXGBE_VFTAVIND(_j, _i)	(0x0A200 + ((_j) * 0x200) + ((_i) * 4))
-#define IXGBE_FCTRL		0x05080
-#define IXGBE_VLNCTRL		0x05088
-#define IXGBE_MCSTCTRL		0x05090
-#define IXGBE_MRQC		0x05818
-#define IXGBE_SAQF(_i)	(0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */
-#define IXGBE_DAQF(_i)	(0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */
-#define IXGBE_SDPQF(_i)	(0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */
-#define IXGBE_FTQF(_i)	(0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */
-#define IXGBE_ETQF(_i)	(0x05128 + ((_i) * 4)) /* EType Queue Filter */
-#define IXGBE_ETQS(_i)	(0x0EC00 + ((_i) * 4)) /* EType Queue Select */
-#define IXGBE_SYNQF	0x0EC30 /* SYN Packet Queue Filter */
-#define IXGBE_RQTC	0x0EC70
-#define IXGBE_MTQC	0x08120
-#define IXGBE_VLVF(_i)	(0x0F100 + ((_i) * 4))  /* 64 of these (0-63) */
-#define IXGBE_VLVFB(_i)	(0x0F200 + ((_i) * 4))  /* 128 of these (0-127) */
-#define IXGBE_VMVIR(_i)	(0x08000 + ((_i) * 4))  /* 64 of these (0-63) */
-#define IXGBE_VT_CTL		0x051B0
-#define IXGBE_PFMAILBOX(_i)	(0x04B00 + (4 * (_i))) /* 64 total */
-/* 64 Mailboxes, 16 DW each */
-#define IXGBE_PFMBMEM(_i)	(0x13000 + (64 * (_i)))
-#define IXGBE_PFMBICR(_i)	(0x00710 + (4 * (_i))) /* 4 total */
-#define IXGBE_PFMBIMR(_i)	(0x00720 + (4 * (_i))) /* 4 total */
-#define IXGBE_VFRE(_i)		(0x051E0 + ((_i) * 4))
-#define IXGBE_VFTE(_i)		(0x08110 + ((_i) * 4))
-#define IXGBE_VMECM(_i)		(0x08790 + ((_i) * 4))
-#define IXGBE_QDE		0x2F04
-#define IXGBE_VMTXSW(_i)	(0x05180 + ((_i) * 4)) /* 2 total */
-#define IXGBE_VMOLR(_i)		(0x0F000 + ((_i) * 4)) /* 64 total */
-#define IXGBE_UTA(_i)		(0x0F400 + ((_i) * 4))
-#define IXGBE_MRCTL(_i)		(0x0F600 + ((_i) * 4))
-#define IXGBE_VMRVLAN(_i)	(0x0F610 + ((_i) * 4))
-#define IXGBE_VMRVM(_i)		(0x0F630 + ((_i) * 4))
-#define IXGBE_L34T_IMIR(_i)	(0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
-#define IXGBE_RXFECCERR0	0x051B8
-#define IXGBE_LLITHRESH		0x0EC90
-#define IXGBE_IMIR(_i)		(0x05A80 + ((_i) * 4))  /* 8 of these (0-7) */
-#define IXGBE_IMIREXT(_i)	(0x05AA0 + ((_i) * 4))  /* 8 of these (0-7) */
-#define IXGBE_IMIRVP		0x05AC0
-#define IXGBE_VMD_CTL		0x0581C
-#define IXGBE_RETA(_i)		(0x05C00 + ((_i) * 4))  /* 32 of these (0-31) */
-#define IXGBE_RSSRK(_i)		(0x05C80 + ((_i) * 4))  /* 10 of these (0-9) */
-
-/* Flow Director registers */
-#define IXGBE_FDIRCTRL	0x0EE00
-#define IXGBE_FDIRHKEY	0x0EE68
-#define IXGBE_FDIRSKEY	0x0EE6C
-#define IXGBE_FDIRDIP4M	0x0EE3C
-#define IXGBE_FDIRSIP4M	0x0EE40
-#define IXGBE_FDIRTCPM	0x0EE44
-#define IXGBE_FDIRUDPM	0x0EE48
-#define IXGBE_FDIRIP6M	0x0EE74
-#define IXGBE_FDIRM	0x0EE70
-
-/* Flow Director Stats registers */
-#define IXGBE_FDIRFREE	0x0EE38
-#define IXGBE_FDIRLEN	0x0EE4C
-#define IXGBE_FDIRUSTAT	0x0EE50
-#define IXGBE_FDIRFSTAT	0x0EE54
-#define IXGBE_FDIRMATCH	0x0EE58
-#define IXGBE_FDIRMISS	0x0EE5C
-
-/* Flow Director Programming registers */
-#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */
-#define IXGBE_FDIRIPSA	0x0EE18
-#define IXGBE_FDIRIPDA	0x0EE1C
-#define IXGBE_FDIRPORT	0x0EE20
-#define IXGBE_FDIRVLAN	0x0EE24
-#define IXGBE_FDIRHASH	0x0EE28
-#define IXGBE_FDIRCMD	0x0EE2C
-
-/* Transmit DMA registers */
-#define IXGBE_TDBAL(_i)		(0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/
-#define IXGBE_TDBAH(_i)		(0x06004 + ((_i) * 0x40))
-#define IXGBE_TDLEN(_i)		(0x06008 + ((_i) * 0x40))
-#define IXGBE_TDH(_i)		(0x06010 + ((_i) * 0x40))
-#define IXGBE_TDT(_i)		(0x06018 + ((_i) * 0x40))
-#define IXGBE_TXDCTL(_i)	(0x06028 + ((_i) * 0x40))
-#define IXGBE_TDWBAL(_i)	(0x06038 + ((_i) * 0x40))
-#define IXGBE_TDWBAH(_i)	(0x0603C + ((_i) * 0x40))
-#define IXGBE_DTXCTL		0x07E00
-
-#define IXGBE_DMATXCTL		0x04A80
-#define IXGBE_PFVFSPOOF(_i)	(0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */
-#define IXGBE_PFDTXGSWC		0x08220
-#define IXGBE_DTXMXSZRQ		0x08100
-#define IXGBE_DTXTCPFLGL	0x04A88
-#define IXGBE_DTXTCPFLGH	0x04A8C
-#define IXGBE_LBDRPEN		0x0CA00
-#define IXGBE_TXPBTHRESH(_i)	(0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */
-
-#define IXGBE_DMATXCTL_TE	0x1 /* Transmit Enable */
-#define IXGBE_DMATXCTL_NS	0x2 /* No Snoop LSO hdr buffer */
-#define IXGBE_DMATXCTL_GDV	0x8 /* Global Double VLAN */
-#define IXGBE_DMATXCTL_VT_SHIFT	16  /* VLAN EtherType */
-
-#define IXGBE_PFDTXGSWC_VT_LBEN	0x1 /* Local L2 VT switch enable */
-
-/* Anti-spoofing defines */
-#define IXGBE_SPOOF_MACAS_MASK		0xFF
-#define IXGBE_SPOOF_VLANAS_MASK		0xFF00
-#define IXGBE_SPOOF_VLANAS_SHIFT	8
-#define IXGBE_PFVFSPOOF_REG_COUNT	8
-/* 16 of these (0-15) */
-#define IXGBE_DCA_TXCTRL(_i)		(0x07200 + ((_i) * 4))
-/* Tx DCA Control register : 128 of these (0-127) */
-#define IXGBE_DCA_TXCTRL_82599(_i)	(0x0600C + ((_i) * 0x40))
-#define IXGBE_TIPG			0x0CB00
-#define IXGBE_TXPBSIZE(_i)		(0x0CC00 + ((_i) * 4)) /* 8 of these */
-#define IXGBE_MNGTXMAP			0x0CD10
-#define IXGBE_TIPG_FIBER_DEFAULT	3
-#define IXGBE_TXPBSIZE_SHIFT		10
-
-/* Wake up registers */
-#define IXGBE_WUC	0x05800
-#define IXGBE_WUFC	0x05808
-#define IXGBE_WUS	0x05810
-#define IXGBE_IPAV	0x05838
-#define IXGBE_IP4AT	0x05840 /* IPv4 table 0x5840-0x5858 */
-#define IXGBE_IP6AT	0x05880 /* IPv6 table 0x5880-0x588F */
-
-#define IXGBE_WUPL	0x05900
-#define IXGBE_WUPM	0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */
-#define IXGBE_FHFT(_n)	(0x09000 + (_n * 0x100)) /* Flex host filter table */
-/* Ext Flexible Host Filter Table */
-#define IXGBE_FHFT_EXT(_n)	(0x09800 + (_n * 0x100))
-
-#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX		4
-#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX	2
-
-/* Each Flexible Filter is at most 128 (0x80) bytes in length */
-#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX		128
-#define IXGBE_FHFT_LENGTH_OFFSET		0xFC  /* Length byte in FHFT */
-#define IXGBE_FHFT_LENGTH_MASK			0x0FF /* Length in lower byte */
-
-/* Definitions for power management and wakeup registers */
-/* Wake Up Control */
-#define IXGBE_WUC_PME_EN	0x00000002 /* PME Enable */
-#define IXGBE_WUC_PME_STATUS	0x00000004 /* PME Status */
-#define IXGBE_WUC_WKEN		0x00000010 /* Enable PE_WAKE_N pin assertion  */
-
-/* Wake Up Filter Control */
-#define IXGBE_WUFC_LNKC	0x00000001 /* Link Status Change Wakeup Enable */
-#define IXGBE_WUFC_MAG	0x00000002 /* Magic Packet Wakeup Enable */
-#define IXGBE_WUFC_EX	0x00000004 /* Directed Exact Wakeup Enable */
-#define IXGBE_WUFC_MC	0x00000008 /* Directed Multicast Wakeup Enable */
-#define IXGBE_WUFC_BC	0x00000010 /* Broadcast Wakeup Enable */
-#define IXGBE_WUFC_ARP	0x00000020 /* ARP Request Packet Wakeup Enable */
-#define IXGBE_WUFC_IPV4	0x00000040 /* Directed IPv4 Packet Wakeup Enable */
-#define IXGBE_WUFC_IPV6	0x00000080 /* Directed IPv6 Packet Wakeup Enable */
-#define IXGBE_WUFC_MNG	0x00000100 /* Directed Mgmt Packet Wakeup Enable */
-
-#define IXGBE_WUFC_IGNORE_TCO	0x00008000 /* Ignore WakeOn TCO packets */
-#define IXGBE_WUFC_FLX0	0x00010000 /* Flexible Filter 0 Enable */
-#define IXGBE_WUFC_FLX1	0x00020000 /* Flexible Filter 1 Enable */
-#define IXGBE_WUFC_FLX2	0x00040000 /* Flexible Filter 2 Enable */
-#define IXGBE_WUFC_FLX3	0x00080000 /* Flexible Filter 3 Enable */
-#define IXGBE_WUFC_FLX4	0x00100000 /* Flexible Filter 4 Enable */
-#define IXGBE_WUFC_FLX5	0x00200000 /* Flexible Filter 5 Enable */
-#define IXGBE_WUFC_FLX_FILTERS	0x000F0000 /* Mask for 4 flex filters */
-/* Mask for Ext. flex filters */
-#define IXGBE_WUFC_EXT_FLX_FILTERS	0x00300000
-#define IXGBE_WUFC_ALL_FILTERS	0x003F00FF /* Mask for all wakeup filters */
-#define IXGBE_WUFC_FLX_OFFSET	16 /* Offset to the Flexible Filters bits */
-
-/* Wake Up Status */
-#define IXGBE_WUS_LNKC		IXGBE_WUFC_LNKC
-#define IXGBE_WUS_MAG		IXGBE_WUFC_MAG
-#define IXGBE_WUS_EX		IXGBE_WUFC_EX
-#define IXGBE_WUS_MC		IXGBE_WUFC_MC
-#define IXGBE_WUS_BC		IXGBE_WUFC_BC
-#define IXGBE_WUS_ARP		IXGBE_WUFC_ARP
-#define IXGBE_WUS_IPV4		IXGBE_WUFC_IPV4
-#define IXGBE_WUS_IPV6		IXGBE_WUFC_IPV6
-#define IXGBE_WUS_MNG		IXGBE_WUFC_MNG
-#define IXGBE_WUS_FLX0		IXGBE_WUFC_FLX0
-#define IXGBE_WUS_FLX1		IXGBE_WUFC_FLX1
-#define IXGBE_WUS_FLX2		IXGBE_WUFC_FLX2
-#define IXGBE_WUS_FLX3		IXGBE_WUFC_FLX3
-#define IXGBE_WUS_FLX4		IXGBE_WUFC_FLX4
-#define IXGBE_WUS_FLX5		IXGBE_WUFC_FLX5
-#define IXGBE_WUS_FLX_FILTERS	IXGBE_WUFC_FLX_FILTERS
-
-/* Wake Up Packet Length */
-#define IXGBE_WUPL_LENGTH_MASK	0xFFFF
-
-/* DCB registers */
-#define IXGBE_DCB_MAX_TRAFFIC_CLASS	8
-#define IXGBE_RMCS		0x03D00
-#define IXGBE_DPMCS		0x07F40
-#define IXGBE_PDPMCS		0x0CD00
-#define IXGBE_RUPPBMR		0x050A0
-#define IXGBE_RT2CR(_i)		(0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RT2SR(_i)		(0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_TDTQ2TCCR(_i)	(0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */
-#define IXGBE_TDTQ2TCSR(_i)	(0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */
-#define IXGBE_TDPT2TCCR(_i)	(0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_TDPT2TCSR(_i)	(0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
-
-
-/* Security Control Registers */
-#define IXGBE_SECTXCTRL		0x08800
-#define IXGBE_SECTXSTAT		0x08804
-#define IXGBE_SECTXBUFFAF	0x08808
-#define IXGBE_SECTXMINIFG	0x08810
-#define IXGBE_SECRXCTRL		0x08D00
-#define IXGBE_SECRXSTAT		0x08D04
-
-/* Security Bit Fields and Masks */
-#define IXGBE_SECTXCTRL_SECTX_DIS	0x00000001
-#define IXGBE_SECTXCTRL_TX_DIS		0x00000002
-#define IXGBE_SECTXCTRL_STORE_FORWARD	0x00000004
-
-#define IXGBE_SECTXSTAT_SECTX_RDY	0x00000001
-#define IXGBE_SECTXSTAT_ECC_TXERR	0x00000002
-
-#define IXGBE_SECRXCTRL_SECRX_DIS	0x00000001
-#define IXGBE_SECRXCTRL_RX_DIS		0x00000002
-
-#define IXGBE_SECRXSTAT_SECRX_RDY	0x00000001
-#define IXGBE_SECRXSTAT_ECC_RXERR	0x00000002
-
-/* LinkSec (MacSec) Registers */
-#define IXGBE_LSECTXCAP		0x08A00
-#define IXGBE_LSECRXCAP		0x08F00
-#define IXGBE_LSECTXCTRL	0x08A04
-#define IXGBE_LSECTXSCL		0x08A08 /* SCI Low */
-#define IXGBE_LSECTXSCH		0x08A0C /* SCI High */
-#define IXGBE_LSECTXSA		0x08A10
-#define IXGBE_LSECTXPN0		0x08A14
-#define IXGBE_LSECTXPN1		0x08A18
-#define IXGBE_LSECTXKEY0(_n)	(0x08A1C + (4 * (_n))) /* 4 of these (0-3) */
-#define IXGBE_LSECTXKEY1(_n)	(0x08A2C + (4 * (_n))) /* 4 of these (0-3) */
-#define IXGBE_LSECRXCTRL	0x08F04
-#define IXGBE_LSECRXSCL		0x08F08
-#define IXGBE_LSECRXSCH		0x08F0C
-#define IXGBE_LSECRXSA(_i)	(0x08F10 + (4 * (_i))) /* 2 of these (0-1) */
-#define IXGBE_LSECRXPN(_i)	(0x08F18 + (4 * (_i))) /* 2 of these (0-1) */
-#define IXGBE_LSECRXKEY(_n, _m)	(0x08F20 + ((0x10 * (_n)) + (4 * (_m))))
-#define IXGBE_LSECTXUT		0x08A3C /* OutPktsUntagged */
-#define IXGBE_LSECTXPKTE	0x08A40 /* OutPktsEncrypted */
-#define IXGBE_LSECTXPKTP	0x08A44 /* OutPktsProtected */
-#define IXGBE_LSECTXOCTE	0x08A48 /* OutOctetsEncrypted */
-#define IXGBE_LSECTXOCTP	0x08A4C /* OutOctetsProtected */
-#define IXGBE_LSECRXUT		0x08F40 /* InPktsUntagged/InPktsNoTag */
-#define IXGBE_LSECRXOCTD	0x08F44 /* InOctetsDecrypted */
-#define IXGBE_LSECRXOCTV	0x08F48 /* InOctetsValidated */
-#define IXGBE_LSECRXBAD		0x08F4C /* InPktsBadTag */
-#define IXGBE_LSECRXNOSCI	0x08F50 /* InPktsNoSci */
-#define IXGBE_LSECRXUNSCI	0x08F54 /* InPktsUnknownSci */
-#define IXGBE_LSECRXUNCH	0x08F58 /* InPktsUnchecked */
-#define IXGBE_LSECRXDELAY	0x08F5C /* InPktsDelayed */
-#define IXGBE_LSECRXLATE	0x08F60 /* InPktsLate */
-#define IXGBE_LSECRXOK(_n)	(0x08F64 + (0x04 * (_n))) /* InPktsOk */
-#define IXGBE_LSECRXINV(_n)	(0x08F6C + (0x04 * (_n))) /* InPktsInvalid */
-#define IXGBE_LSECRXNV(_n)	(0x08F74 + (0x04 * (_n))) /* InPktsNotValid */
-#define IXGBE_LSECRXUNSA	0x08F7C /* InPktsUnusedSa */
-#define IXGBE_LSECRXNUSA	0x08F80 /* InPktsNotUsingSa */
-
-/* LinkSec (MacSec) Bit Fields and Masks */
-#define IXGBE_LSECTXCAP_SUM_MASK	0x00FF0000
-#define IXGBE_LSECTXCAP_SUM_SHIFT	16
-#define IXGBE_LSECRXCAP_SUM_MASK	0x00FF0000
-#define IXGBE_LSECRXCAP_SUM_SHIFT	16
-
-#define IXGBE_LSECTXCTRL_EN_MASK	0x00000003
-#define IXGBE_LSECTXCTRL_DISABLE	0x0
-#define IXGBE_LSECTXCTRL_AUTH		0x1
-#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT	0x2
-#define IXGBE_LSECTXCTRL_AISCI		0x00000020
-#define IXGBE_LSECTXCTRL_PNTHRSH_MASK	0xFFFFFF00
-#define IXGBE_LSECTXCTRL_RSV_MASK	0x000000D8
-
-#define IXGBE_LSECRXCTRL_EN_MASK	0x0000000C
-#define IXGBE_LSECRXCTRL_EN_SHIFT	2
-#define IXGBE_LSECRXCTRL_DISABLE	0x0
-#define IXGBE_LSECRXCTRL_CHECK		0x1
-#define IXGBE_LSECRXCTRL_STRICT		0x2
-#define IXGBE_LSECRXCTRL_DROP		0x3
-#define IXGBE_LSECRXCTRL_PLSH		0x00000040
-#define IXGBE_LSECRXCTRL_RP		0x00000080
-#define IXGBE_LSECRXCTRL_RSV_MASK	0xFFFFFF33
-
-/* IpSec Registers */
-#define IXGBE_IPSTXIDX		0x08900
-#define IXGBE_IPSTXSALT		0x08904
-#define IXGBE_IPSTXKEY(_i)	(0x08908 + (4 * (_i))) /* 4 of these (0-3) */
-#define IXGBE_IPSRXIDX		0x08E00
-#define IXGBE_IPSRXIPADDR(_i)	(0x08E04 + (4 * (_i))) /* 4 of these (0-3) */
-#define IXGBE_IPSRXSPI		0x08E14
-#define IXGBE_IPSRXIPIDX	0x08E18
-#define IXGBE_IPSRXKEY(_i)	(0x08E1C + (4 * (_i))) /* 4 of these (0-3) */
-#define IXGBE_IPSRXSALT		0x08E2C
-#define IXGBE_IPSRXMOD		0x08E30
-
-#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE	0x4
-
-/* DCB registers */
-#define IXGBE_RTRPCS		0x02430
-#define IXGBE_RTTDCS		0x04900
-#define IXGBE_RTTDCS_ARBDIS	0x00000040 /* DCB arbiter disable */
-#define IXGBE_RTTPCS		0x0CD00
-#define IXGBE_RTRUP2TC		0x03020
-#define IXGBE_RTTUP2TC		0x0C800
-#define IXGBE_RTRPT4C(_i)	(0x02140 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_TXLLQ(_i)		(0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */
-#define IXGBE_RTRPT4S(_i)	(0x02160 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RTTDT2C(_i)	(0x04910 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RTTDT2S(_i)	(0x04930 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RTTPT2C(_i)	(0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RTTPT2S(_i)	(0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_RTTDQSEL		0x04904
-#define IXGBE_RTTDT1C		0x04908
-#define IXGBE_RTTDT1S		0x0490C
-#define IXGBE_RTTDTECC		0x04990
-#define IXGBE_RTTDTECC_NO_BCN	0x00000100
-
-#define IXGBE_RTTBCNRC			0x04984
-#define IXGBE_RTTBCNRC_RS_ENA		0x80000000
-#define IXGBE_RTTBCNRC_RF_DEC_MASK	0x00003FFF
-#define IXGBE_RTTBCNRC_RF_INT_SHIFT	14
-#define IXGBE_RTTBCNRC_RF_INT_MASK \
-	(IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT)
-#define IXGBE_RTTBCNRM	0x04980
-
-/* FCoE DMA Context Registers */
-#define IXGBE_FCPTRL		0x02410 /* FC User Desc. PTR Low */
-#define IXGBE_FCPTRH		0x02414 /* FC USer Desc. PTR High */
-#define IXGBE_FCBUFF		0x02418 /* FC Buffer Control */
-#define IXGBE_FCDMARW		0x02420 /* FC Receive DMA RW */
-#define IXGBE_FCINVST0		0x03FC0 /* FC Invalid DMA Context Status Reg 0*/
-#define IXGBE_FCINVST(_i)	(IXGBE_FCINVST0 + ((_i) * 4))
-#define IXGBE_FCBUFF_VALID	(1 << 0)   /* DMA Context Valid */
-#define IXGBE_FCBUFF_BUFFSIZE	(3 << 3)   /* User Buffer Size */
-#define IXGBE_FCBUFF_WRCONTX	(1 << 7)   /* 0: Initiator, 1: Target */
-#define IXGBE_FCBUFF_BUFFCNT	0x0000ff00 /* Number of User Buffers */
-#define IXGBE_FCBUFF_OFFSET	0xffff0000 /* User Buffer Offset */
-#define IXGBE_FCBUFF_BUFFSIZE_SHIFT	3
-#define IXGBE_FCBUFF_BUFFCNT_SHIFT	8
-#define IXGBE_FCBUFF_OFFSET_SHIFT	16
-#define IXGBE_FCDMARW_WE		(1 << 14)   /* Write enable */
-#define IXGBE_FCDMARW_RE		(1 << 15)   /* Read enable */
-#define IXGBE_FCDMARW_FCOESEL		0x000001ff  /* FC X_ID: 11 bits */
-#define IXGBE_FCDMARW_LASTSIZE		0xffff0000  /* Last User Buffer Size */
-#define IXGBE_FCDMARW_LASTSIZE_SHIFT	16
-/* FCoE SOF/EOF */
-#define IXGBE_TEOFF		0x04A94 /* Tx FC EOF */
-#define IXGBE_TSOFF		0x04A98 /* Tx FC SOF */
-#define IXGBE_REOFF		0x05158 /* Rx FC EOF */
-#define IXGBE_RSOFF		0x051F8 /* Rx FC SOF */
-/* FCoE Filter Context Registers */
-#define IXGBE_FCFLT		0x05108 /* FC FLT Context */
-#define IXGBE_FCFLTRW		0x05110 /* FC Filter RW Control */
-#define IXGBE_FCPARAM		0x051d8 /* FC Offset Parameter */
-#define IXGBE_FCFLT_VALID	(1 << 0)   /* Filter Context Valid */
-#define IXGBE_FCFLT_FIRST	(1 << 1)   /* Filter First */
-#define IXGBE_FCFLT_SEQID	0x00ff0000 /* Sequence ID */
-#define IXGBE_FCFLT_SEQCNT	0xff000000 /* Sequence Count */
-#define IXGBE_FCFLTRW_RVALDT	(1 << 13)  /* Fast Re-Validation */
-#define IXGBE_FCFLTRW_WE	(1 << 14)  /* Write Enable */
-#define IXGBE_FCFLTRW_RE	(1 << 15)  /* Read Enable */
-/* FCoE Receive Control */
-#define IXGBE_FCRXCTRL		0x05100 /* FC Receive Control */
-#define IXGBE_FCRXCTRL_FCOELLI	(1 << 0)   /* Low latency interrupt */
-#define IXGBE_FCRXCTRL_SAVBAD	(1 << 1)   /* Save Bad Frames */
-#define IXGBE_FCRXCTRL_FRSTRDH	(1 << 2)   /* EN 1st Read Header */
-#define IXGBE_FCRXCTRL_LASTSEQH	(1 << 3)   /* EN Last Header in Seq */
-#define IXGBE_FCRXCTRL_ALLH	(1 << 4)   /* EN All Headers */
-#define IXGBE_FCRXCTRL_FRSTSEQH	(1 << 5)   /* EN 1st Seq. Header */
-#define IXGBE_FCRXCTRL_ICRC	(1 << 6)   /* Ignore Bad FC CRC */
-#define IXGBE_FCRXCTRL_FCCRCBO	(1 << 7)   /* FC CRC Byte Ordering */
-#define IXGBE_FCRXCTRL_FCOEVER	0x00000f00 /* FCoE Version: 4 bits */
-#define IXGBE_FCRXCTRL_FCOEVER_SHIFT	8
-/* FCoE Redirection */
-#define IXGBE_FCRECTL		0x0ED00 /* FC Redirection Control */
-#define IXGBE_FCRETA0		0x0ED10 /* FC Redirection Table 0 */
-#define IXGBE_FCRETA(_i)	(IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */
-#define IXGBE_FCRECTL_ENA	0x1 /* FCoE Redir Table Enable */
-#define IXGBE_FCRETASEL_ENA	0x2 /* FCoE FCRETASEL bit */
-#define IXGBE_FCRETA_SIZE	8 /* Max entries in FCRETA */
-#define IXGBE_FCRETA_ENTRY_MASK	0x0000007f /* 7 bits for the queue index */
-
-/* Stats registers */
-#define IXGBE_CRCERRS	0x04000
-#define IXGBE_ILLERRC	0x04004
-#define IXGBE_ERRBC	0x04008
-#define IXGBE_MSPDC	0x04010
-#define IXGBE_MPC(_i)	(0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/
-#define IXGBE_MLFC	0x04034
-#define IXGBE_MRFC	0x04038
-#define IXGBE_RLEC	0x04040
-#define IXGBE_LXONTXC	0x03F60
-#define IXGBE_LXONRXC	0x0CF60
-#define IXGBE_LXOFFTXC	0x03F68
-#define IXGBE_LXOFFRXC	0x0CF68
-#define IXGBE_LXONRXCNT		0x041A4
-#define IXGBE_LXOFFRXCNT	0x041A8
-#define IXGBE_PXONRXCNT(_i)	(0x04140 + ((_i) * 4)) /* 8 of these */
-#define IXGBE_PXOFFRXCNT(_i)	(0x04160 + ((_i) * 4)) /* 8 of these */
-#define IXGBE_PXON2OFFCNT(_i)	(0x03240 + ((_i) * 4)) /* 8 of these */
-#define IXGBE_PXONTXC(_i)	(0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/
-#define IXGBE_PXONRXC(_i)	(0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/
-#define IXGBE_PXOFFTXC(_i)	(0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/
-#define IXGBE_PXOFFRXC(_i)	(0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/
-#define IXGBE_PRC64		0x0405C
-#define IXGBE_PRC127		0x04060
-#define IXGBE_PRC255		0x04064
-#define IXGBE_PRC511		0x04068
-#define IXGBE_PRC1023		0x0406C
-#define IXGBE_PRC1522		0x04070
-#define IXGBE_GPRC		0x04074
-#define IXGBE_BPRC		0x04078
-#define IXGBE_MPRC		0x0407C
-#define IXGBE_GPTC		0x04080
-#define IXGBE_GORCL		0x04088
-#define IXGBE_GORCH		0x0408C
-#define IXGBE_GOTCL		0x04090
-#define IXGBE_GOTCH		0x04094
-#define IXGBE_RNBC(_i)		(0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/
-#define IXGBE_RUC		0x040A4
-#define IXGBE_RFC		0x040A8
-#define IXGBE_ROC		0x040AC
-#define IXGBE_RJC		0x040B0
-#define IXGBE_MNGPRC		0x040B4
-#define IXGBE_MNGPDC		0x040B8
-#define IXGBE_MNGPTC		0x0CF90
-#define IXGBE_TORL		0x040C0
-#define IXGBE_TORH		0x040C4
-#define IXGBE_TPR		0x040D0
-#define IXGBE_TPT		0x040D4
-#define IXGBE_PTC64		0x040D8
-#define IXGBE_PTC127		0x040DC
-#define IXGBE_PTC255		0x040E0
-#define IXGBE_PTC511		0x040E4
-#define IXGBE_PTC1023		0x040E8
-#define IXGBE_PTC1522		0x040EC
-#define IXGBE_MPTC		0x040F0
-#define IXGBE_BPTC		0x040F4
-#define IXGBE_XEC		0x04120
-#define IXGBE_SSVPC		0x08780
-
-#define IXGBE_RQSMR(_i)	(0x02300 + ((_i) * 4))
-#define IXGBE_TQSMR(_i)	(((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \
-			 (0x08600 + ((_i) * 4)))
-#define IXGBE_TQSM(_i)	(0x08600 + ((_i) * 4))
-
-#define IXGBE_QPRC(_i)	(0x01030 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QPTC(_i)	(0x06030 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QBRC(_i)	(0x01034 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QBTC(_i)	(0x06034 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QBRC_L(_i)	(0x01034 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QBRC_H(_i)	(0x01038 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QPRDC(_i)		(0x01430 + ((_i) * 0x40)) /* 16 of these */
-#define IXGBE_QBTC_L(_i)	(0x08700 + ((_i) * 0x8)) /* 16 of these */
-#define IXGBE_QBTC_H(_i)	(0x08704 + ((_i) * 0x8)) /* 16 of these */
-#define IXGBE_FCCRC		0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */
-#define IXGBE_FCOERPDC		0x0241C /* FCoE Rx Packets Dropped Count */
-#define IXGBE_FCLAST		0x02424 /* FCoE Last Error Count */
-#define IXGBE_FCOEPRC		0x02428 /* Number of FCoE Packets Received */
-#define IXGBE_FCOEDWRC		0x0242C /* Number of FCoE DWords Received */
-#define IXGBE_FCOEPTC		0x08784 /* Number of FCoE Packets Transmitted */
-#define IXGBE_FCOEDWTC		0x08788 /* Number of FCoE DWords Transmitted */
-#define IXGBE_FCCRC_CNT_MASK	0x0000FFFF /* CRC_CNT: bit 0 - 15 */
-#define IXGBE_FCLAST_CNT_MASK	0x0000FFFF /* Last_CNT: bit 0 - 15 */
-#define IXGBE_O2BGPTC		0x041C4
-#define IXGBE_O2BSPC		0x087B0
-#define IXGBE_B2OSPC		0x041C0
-#define IXGBE_B2OGPRC		0x02F90
-#define IXGBE_BUPRC		0x04180
-#define IXGBE_BMPRC		0x04184
-#define IXGBE_BBPRC		0x04188
-#define IXGBE_BUPTC		0x0418C
-#define IXGBE_BMPTC		0x04190
-#define IXGBE_BBPTC		0x04194
-#define IXGBE_BCRCERRS		0x04198
-#define IXGBE_BXONRXC		0x0419C
-#define IXGBE_BXOFFRXC		0x041E0
-#define IXGBE_BXONTXC		0x041E4
-#define IXGBE_BXOFFTXC		0x041E8
-#define IXGBE_PCRC8ECL		0x0E810
-#define IXGBE_PCRC8ECH		0x0E811
-#define IXGBE_PCRC8ECH_MASK	0x1F
-#define IXGBE_LDPCECL		0x0E820
-#define IXGBE_LDPCECH		0x0E821
-
-/* Management */
-#define IXGBE_MAVTV(_i)		(0x05010 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_MFUTP(_i)		(0x05030 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_MANC		0x05820
-#define IXGBE_MFVAL		0x05824
-#define IXGBE_MANC2H		0x05860
-#define IXGBE_MDEF(_i)		(0x05890 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_MIPAF		0x058B0
-#define IXGBE_MMAL(_i)		(0x05910 + ((_i) * 8)) /* 4 of these (0-3) */
-#define IXGBE_MMAH(_i)		(0x05914 + ((_i) * 8)) /* 4 of these (0-3) */
-#define IXGBE_FTFT		0x09400 /* 0x9400-0x97FC */
-#define IXGBE_METF(_i)		(0x05190 + ((_i) * 4)) /* 4 of these (0-3) */
-#define IXGBE_MDEF_EXT(_i)	(0x05160 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_LSWFW		0x15014
-#define IXGBE_BMCIP(_i)		(0x05050 + ((_i) * 4)) /* 0x5050-0x505C */
-#define IXGBE_BMCIPVAL		0x05060
-#define IXGBE_BMCIP_IPADDR_TYPE	0x00000001
-#define IXGBE_BMCIP_IPADDR_VALID	0x00000002
-
-/* Management Bit Fields and Masks */
-#define IXGBE_MANC_EN_BMC2OS	0x10000000 /* Ena BMC2OS and OS2BMC traffic */
-#define IXGBE_MANC_EN_BMC2OS_SHIFT	28
-
-/* Firmware Semaphore Register */
-#define IXGBE_FWSM_MODE_MASK	0xE
-
-/* ARC Subsystem registers */
-#define IXGBE_HICR		0x15F00
-#define IXGBE_FWSTS		0x15F0C
-#define IXGBE_HSMC0R		0x15F04
-#define IXGBE_HSMC1R		0x15F08
-#define IXGBE_SWSR		0x15F10
-#define IXGBE_HFDR		0x15FE8
-#define IXGBE_FLEX_MNG		0x15800 /* 0x15800 - 0x15EFC */
-
-#define IXGBE_HICR_EN		0x01  /* Enable bit - RO */
-/* Driver sets this bit when done to put command in RAM */
-#define IXGBE_HICR_C		0x02
-#define IXGBE_HICR_SV		0x04  /* Status Validity */
-#define IXGBE_HICR_FW_RESET_ENABLE	0x40
-#define IXGBE_HICR_FW_RESET	0x80
-
-/* PCI-E registers */
-#define IXGBE_GCR		0x11000
-#define IXGBE_GTV		0x11004
-#define IXGBE_FUNCTAG		0x11008
-#define IXGBE_GLT		0x1100C
-#define IXGBE_PCIEPIPEADR	0x11004
-#define IXGBE_PCIEPIPEDAT	0x11008
-#define IXGBE_GSCL_1		0x11010
-#define IXGBE_GSCL_2		0x11014
-#define IXGBE_GSCL_3		0x11018
-#define IXGBE_GSCL_4		0x1101C
-#define IXGBE_GSCN_0		0x11020
-#define IXGBE_GSCN_1		0x11024
-#define IXGBE_GSCN_2		0x11028
-#define IXGBE_GSCN_3		0x1102C
-#define IXGBE_FACTPS		0x10150
-#define IXGBE_PCIEANACTL	0x11040
-#define IXGBE_SWSM		0x10140
-#define IXGBE_FWSM		0x10148
-#define IXGBE_GSSR		0x10160
-#define IXGBE_MREVID		0x11064
-#define IXGBE_DCA_ID		0x11070
-#define IXGBE_DCA_CTRL		0x11074
-#define IXGBE_SWFW_SYNC		IXGBE_GSSR
-
-/* PCI-E registers 82599-Specific */
-#define IXGBE_GCR_EXT		0x11050
-#define IXGBE_GSCL_5_82599	0x11030
-#define IXGBE_GSCL_6_82599	0x11034
-#define IXGBE_GSCL_7_82599	0x11038
-#define IXGBE_GSCL_8_82599	0x1103C
-#define IXGBE_PHYADR_82599	0x11040
-#define IXGBE_PHYDAT_82599	0x11044
-#define IXGBE_PHYCTL_82599	0x11048
-#define IXGBE_PBACLR_82599	0x11068
-#define IXGBE_CIAA_82599	0x11088
-#define IXGBE_CIAD_82599	0x1108C
-#define IXGBE_PICAUSE		0x110B0
-#define IXGBE_PIENA		0x110B8
-#define IXGBE_CDQ_MBR_82599	0x110B4
-#define IXGBE_PCIESPARE		0x110BC
-#define IXGBE_MISC_REG_82599	0x110F0
-#define IXGBE_ECC_CTRL_0_82599	0x11100
-#define IXGBE_ECC_CTRL_1_82599	0x11104
-#define IXGBE_ECC_STATUS_82599	0x110E0
-#define IXGBE_BAR_CTRL_82599	0x110F4
-
-/* PCI Express Control */
-#define IXGBE_GCR_CMPL_TMOUT_MASK	0x0000F000
-#define IXGBE_GCR_CMPL_TMOUT_10ms	0x00001000
-#define IXGBE_GCR_CMPL_TMOUT_RESEND	0x00010000
-#define IXGBE_GCR_CAP_VER2		0x00040000
-
-#define IXGBE_GCR_EXT_MSIX_EN		0x80000000
-#define IXGBE_GCR_EXT_BUFFERS_CLEAR	0x40000000
-#define IXGBE_GCR_EXT_VT_MODE_16	0x00000001
-#define IXGBE_GCR_EXT_VT_MODE_32	0x00000002
-#define IXGBE_GCR_EXT_VT_MODE_64	0x00000003
-#define IXGBE_GCR_EXT_SRIOV		(IXGBE_GCR_EXT_MSIX_EN | \
-					 IXGBE_GCR_EXT_VT_MODE_64)
-/* Time Sync Registers */
-#define IXGBE_TSYNCRXCTL	0x05188 /* Rx Time Sync Control register - RW */
-#define IXGBE_TSYNCTXCTL	0x08C00 /* Tx Time Sync Control register - RW */
-#define IXGBE_RXSTMPL	0x051E8 /* Rx timestamp Low - RO */
-#define IXGBE_RXSTMPH	0x051A4 /* Rx timestamp High - RO */
-#define IXGBE_RXSATRL	0x051A0 /* Rx timestamp attribute low - RO */
-#define IXGBE_RXSATRH	0x051A8 /* Rx timestamp attribute high - RO */
-#define IXGBE_RXMTRL	0x05120 /* RX message type register low - RW */
-#define IXGBE_TXSTMPL	0x08C04 /* Tx timestamp value Low - RO */
-#define IXGBE_TXSTMPH	0x08C08 /* Tx timestamp value High - RO */
-#define IXGBE_SYSTIML	0x08C0C /* System time register Low - RO */
-#define IXGBE_SYSTIMH	0x08C10 /* System time register High - RO */
-#define IXGBE_TIMINCA	0x08C14 /* Increment attributes register - RW */
-#define IXGBE_TIMADJL	0x08C18 /* Time Adjustment Offset register Low - RW */
-#define IXGBE_TIMADJH	0x08C1C /* Time Adjustment Offset register High - RW */
-#define IXGBE_TSAUXC	0x08C20 /* TimeSync Auxiliary Control register - RW */
-#define IXGBE_TRGTTIML0	0x08C24 /* Target Time Register 0 Low - RW */
-#define IXGBE_TRGTTIMH0	0x08C28 /* Target Time Register 0 High - RW */
-#define IXGBE_TRGTTIML1	0x08C2C /* Target Time Register 1 Low - RW */
-#define IXGBE_TRGTTIMH1	0x08C30 /* Target Time Register 1 High - RW */
-#define IXGBE_FREQOUT0	0x08C34 /* Frequency Out 0 Control register - RW */
-#define IXGBE_FREQOUT1	0x08C38 /* Frequency Out 1 Control register - RW */
-#define IXGBE_AUXSTMPL0	0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */
-#define IXGBE_AUXSTMPH0	0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
-#define IXGBE_AUXSTMPL1	0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
-#define IXGBE_AUXSTMPH1	0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
-
-/* Diagnostic Registers */
-#define IXGBE_RDSTATCTL		0x02C20
-#define IXGBE_RDSTAT(_i)	(0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */
-#define IXGBE_RDHMPN		0x02F08
-#define IXGBE_RIC_DW(_i)	(0x02F10 + ((_i) * 4))
-#define IXGBE_RDPROBE		0x02F20
-#define IXGBE_RDMAM		0x02F30
-#define IXGBE_RDMAD		0x02F34
-#define IXGBE_TDSTATCTL		0x07C20
-#define IXGBE_TDSTAT(_i)	(0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */
-#define IXGBE_TDHMPN		0x07F08
-#define IXGBE_TDHMPN2		0x082FC
-#define IXGBE_TXDESCIC		0x082CC
-#define IXGBE_TIC_DW(_i)	(0x07F10 + ((_i) * 4))
-#define IXGBE_TIC_DW2(_i)	(0x082B0 + ((_i) * 4))
-#define IXGBE_TDPROBE		0x07F20
-#define IXGBE_TXBUFCTRL		0x0C600
-#define IXGBE_TXBUFDATA0	0x0C610
-#define IXGBE_TXBUFDATA1	0x0C614
-#define IXGBE_TXBUFDATA2	0x0C618
-#define IXGBE_TXBUFDATA3	0x0C61C
-#define IXGBE_RXBUFCTRL		0x03600
-#define IXGBE_RXBUFDATA0	0x03610
-#define IXGBE_RXBUFDATA1	0x03614
-#define IXGBE_RXBUFDATA2	0x03618
-#define IXGBE_RXBUFDATA3	0x0361C
-#define IXGBE_PCIE_DIAG(_i)	(0x11090 + ((_i) * 4)) /* 8 of these */
-#define IXGBE_RFVAL		0x050A4
-#define IXGBE_MDFTC1		0x042B8
-#define IXGBE_MDFTC2		0x042C0
-#define IXGBE_MDFTFIFO1		0x042C4
-#define IXGBE_MDFTFIFO2		0x042C8
-#define IXGBE_MDFTS		0x042CC
-#define IXGBE_RXDATAWRPTR(_i)	(0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/
-#define IXGBE_RXDESCWRPTR(_i)	(0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/
-#define IXGBE_RXDATARDPTR(_i)	(0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/
-#define IXGBE_RXDESCRDPTR(_i)	(0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/
-#define IXGBE_TXDATAWRPTR(_i)	(0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/
-#define IXGBE_TXDESCWRPTR(_i)	(0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/
-#define IXGBE_TXDATARDPTR(_i)	(0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/
-#define IXGBE_TXDESCRDPTR(_i)	(0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/
-#define IXGBE_PCIEECCCTL	0x1106C
-#define IXGBE_RXWRPTR(_i)	(0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/
-#define IXGBE_RXUSED(_i)	(0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/
-#define IXGBE_RXRDPTR(_i)	(0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/
-#define IXGBE_RXRDWRPTR(_i)	(0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/
-#define IXGBE_TXWRPTR(_i)	(0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/
-#define IXGBE_TXUSED(_i)	(0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/
-#define IXGBE_TXRDPTR(_i)	(0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/
-#define IXGBE_TXRDWRPTR(_i)	(0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/
-#define IXGBE_PCIEECCCTL0	0x11100
-#define IXGBE_PCIEECCCTL1	0x11104
-#define IXGBE_RXDBUECC		0x03F70
-#define IXGBE_TXDBUECC		0x0CF70
-#define IXGBE_RXDBUEST		0x03F74
-#define IXGBE_TXDBUEST		0x0CF74
-#define IXGBE_PBTXECC		0x0C300
-#define IXGBE_PBRXECC		0x03300
-#define IXGBE_GHECCR		0x110B0
-
-/* MAC Registers */
-#define IXGBE_PCS1GCFIG		0x04200
-#define IXGBE_PCS1GLCTL		0x04208
-#define IXGBE_PCS1GLSTA		0x0420C
-#define IXGBE_PCS1GDBG0		0x04210
-#define IXGBE_PCS1GDBG1		0x04214
-#define IXGBE_PCS1GANA		0x04218
-#define IXGBE_PCS1GANLP		0x0421C
-#define IXGBE_PCS1GANNP		0x04220
-#define IXGBE_PCS1GANLPNP	0x04224
-#define IXGBE_HLREG0		0x04240
-#define IXGBE_HLREG1		0x04244
-#define IXGBE_PAP		0x04248
-#define IXGBE_MACA		0x0424C
-#define IXGBE_APAE		0x04250
-#define IXGBE_ARD		0x04254
-#define IXGBE_AIS		0x04258
-#define IXGBE_MSCA		0x0425C
-#define IXGBE_MSRWD		0x04260
-#define IXGBE_MLADD		0x04264
-#define IXGBE_MHADD		0x04268
-#define IXGBE_MAXFRS		0x04268
-#define IXGBE_TREG		0x0426C
-#define IXGBE_PCSS1		0x04288
-#define IXGBE_PCSS2		0x0428C
-#define IXGBE_XPCSS		0x04290
-#define IXGBE_MFLCN		0x04294
-#define IXGBE_SERDESC		0x04298
-#define IXGBE_MACS		0x0429C
-#define IXGBE_AUTOC		0x042A0
-#define IXGBE_LINKS		0x042A4
-#define IXGBE_LINKS2		0x04324
-#define IXGBE_AUTOC2		0x042A8
-#define IXGBE_AUTOC3		0x042AC
-#define IXGBE_ANLP1		0x042B0
-#define IXGBE_ANLP2		0x042B4
-#define IXGBE_MACC		0x04330
-#define IXGBE_ATLASCTL		0x04800
-#define IXGBE_MMNGC		0x042D0
-#define IXGBE_ANLPNP1		0x042D4
-#define IXGBE_ANLPNP2		0x042D8
-#define IXGBE_KRPCSFC		0x042E0
-#define IXGBE_KRPCSS		0x042E4
-#define IXGBE_FECS1		0x042E8
-#define IXGBE_FECS2		0x042EC
-#define IXGBE_SMADARCTL		0x14F10
-#define IXGBE_MPVC		0x04318
-#define IXGBE_SGMIIC		0x04314
-
-/* Statistics Registers */
-#define IXGBE_RXNFGPC		0x041B0
-#define IXGBE_RXNFGBCL		0x041B4
-#define IXGBE_RXNFGBCH		0x041B8
-#define IXGBE_RXDGPC		0x02F50
-#define IXGBE_RXDGBCL		0x02F54
-#define IXGBE_RXDGBCH		0x02F58
-#define IXGBE_RXDDGPC		0x02F5C
-#define IXGBE_RXDDGBCL		0x02F60
-#define IXGBE_RXDDGBCH		0x02F64
-#define IXGBE_RXLPBKGPC		0x02F68
-#define IXGBE_RXLPBKGBCL	0x02F6C
-#define IXGBE_RXLPBKGBCH	0x02F70
-#define IXGBE_RXDLPBKGPC	0x02F74
-#define IXGBE_RXDLPBKGBCL	0x02F78
-#define IXGBE_RXDLPBKGBCH	0x02F7C
-#define IXGBE_TXDGPC		0x087A0
-#define IXGBE_TXDGBCL		0x087A4
-#define IXGBE_TXDGBCH		0x087A8
-
-#define IXGBE_RXDSTATCTRL	0x02F40
-
-/* Copper Pond 2 link timeout */
-#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50
-
-/* Omer CORECTL */
-#define IXGBE_CORECTL			0x014F00
-/* BARCTRL */
-#define IXGBE_BARCTRL			0x110F4
-#define IXGBE_BARCTRL_FLSIZE		0x0700
-#define IXGBE_BARCTRL_FLSIZE_SHIFT	8
-#define IXGBE_BARCTRL_CSRSIZE		0x2000
-
-/* RSCCTL Bit Masks */
-#define IXGBE_RSCCTL_RSCEN	0x01
-#define IXGBE_RSCCTL_MAXDESC_1	0x00
-#define IXGBE_RSCCTL_MAXDESC_4	0x04
-#define IXGBE_RSCCTL_MAXDESC_8	0x08
-#define IXGBE_RSCCTL_MAXDESC_16	0x0C
-
-/* RSCDBU Bit Masks */
-#define IXGBE_RSCDBU_RSCSMALDIS_MASK	0x0000007F
-#define IXGBE_RSCDBU_RSCACKDIS		0x00000080
-
-/* RDRXCTL Bit Masks */
-#define IXGBE_RDRXCTL_RDMTS_1_2		0x00000000 /* Rx Desc Min THLD Size */
-#define IXGBE_RDRXCTL_CRCSTRIP		0x00000002 /* CRC Strip */
-#define IXGBE_RDRXCTL_MVMEN		0x00000020
-#define IXGBE_RDRXCTL_DMAIDONE		0x00000008 /* DMA init cycle done */
-#define IXGBE_RDRXCTL_AGGDIS		0x00010000 /* Aggregation disable */
-#define IXGBE_RDRXCTL_RSCFRSTSIZE	0x003E0000 /* RSC First packet size */
-#define IXGBE_RDRXCTL_RSCLLIDIS		0x00800000 /* Disabl RSC compl on LLI */
-#define IXGBE_RDRXCTL_RSCACKC		0x02000000 /* must set 1 when RSC ena */
-#define IXGBE_RDRXCTL_FCOE_WRFIX	0x04000000 /* must set 1 when RSC ena */
-
-/* RQTC Bit Masks and Shifts */
-#define IXGBE_RQTC_SHIFT_TC(_i)	((_i) * 4)
-#define IXGBE_RQTC_TC0_MASK	(0x7 << 0)
-#define IXGBE_RQTC_TC1_MASK	(0x7 << 4)
-#define IXGBE_RQTC_TC2_MASK	(0x7 << 8)
-#define IXGBE_RQTC_TC3_MASK	(0x7 << 12)
-#define IXGBE_RQTC_TC4_MASK	(0x7 << 16)
-#define IXGBE_RQTC_TC5_MASK	(0x7 << 20)
-#define IXGBE_RQTC_TC6_MASK	(0x7 << 24)
-#define IXGBE_RQTC_TC7_MASK	(0x7 << 28)
-
-/* PSRTYPE.RQPL Bit masks and shift */
-#define IXGBE_PSRTYPE_RQPL_MASK		0x7
-#define IXGBE_PSRTYPE_RQPL_SHIFT	29
-
-/* CTRL Bit Masks */
-#define IXGBE_CTRL_GIO_DIS	0x00000004 /* Global IO Master Disable bit */
-#define IXGBE_CTRL_LNK_RST	0x00000008 /* Link Reset. Resets everything. */
-#define IXGBE_CTRL_RST		0x04000000 /* Reset (SW) */
-#define IXGBE_CTRL_RST_MASK	(IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST)
-
-/* FACTPS */
-#define IXGBE_FACTPS_LFS	0x40000000 /* LAN Function Select */
-
-/* MHADD Bit Masks */
-#define IXGBE_MHADD_MFS_MASK	0xFFFF0000
-#define IXGBE_MHADD_MFS_SHIFT	16
-
-/* Extended Device Control */
-#define IXGBE_CTRL_EXT_PFRSTD	0x00004000 /* Physical Function Reset Done */
-#define IXGBE_CTRL_EXT_NS_DIS	0x00010000 /* No Snoop disable */
-#define IXGBE_CTRL_EXT_RO_DIS	0x00020000 /* Relaxed Ordering disable */
-#define IXGBE_CTRL_EXT_DRV_LOAD	0x10000000 /* Driver loaded bit for FW */
-
-/* Direct Cache Access (DCA) definitions */
-#define IXGBE_DCA_CTRL_DCA_ENABLE	0x00000000 /* DCA Enable */
-#define IXGBE_DCA_CTRL_DCA_DISABLE	0x00000001 /* DCA Disable */
-
-#define IXGBE_DCA_CTRL_DCA_MODE_CB1	0x00 /* DCA Mode CB1 */
-#define IXGBE_DCA_CTRL_DCA_MODE_CB2	0x02 /* DCA Mode CB2 */
-
-#define IXGBE_DCA_RXCTRL_CPUID_MASK	0x0000001F /* Rx CPUID Mask */
-#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599	0xFF000000 /* Rx CPUID Mask */
-#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599	24 /* Rx CPUID Shift */
-#define IXGBE_DCA_RXCTRL_DESC_DCA_EN	(1 << 5) /* Rx Desc enable */
-#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN	(1 << 6) /* Rx Desc header ena */
-#define IXGBE_DCA_RXCTRL_DATA_DCA_EN	(1 << 7) /* Rx Desc payload ena */
-#define IXGBE_DCA_RXCTRL_DESC_RRO_EN	(1 << 9) /* Rx rd Desc Relax Order */
-#define IXGBE_DCA_RXCTRL_DATA_WRO_EN	(1 << 13) /* Rx wr data Relax Order */
-#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN	(1 << 15) /* Rx wr header RO */
-
-#define IXGBE_DCA_TXCTRL_CPUID_MASK	0x0000001F /* Tx CPUID Mask */
-#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599	0xFF000000 /* Tx CPUID Mask */
-#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599	24 /* Tx CPUID Shift */
-#define IXGBE_DCA_TXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Tx Desc enable */
-#define IXGBE_DCA_TXCTRL_DESC_RRO_EN	(1 << 9) /* Tx rd Desc Relax Order */
-#define IXGBE_DCA_TXCTRL_DESC_WRO_EN	(1 << 11) /* Tx Desc writeback RO bit */
-#define IXGBE_DCA_TXCTRL_DATA_RRO_EN	(1 << 13) /* Tx rd data Relax Order */
-#define IXGBE_DCA_MAX_QUEUES_82598	16 /* DCA regs only on 16 queues */
-
-/* MSCA Bit Masks */
-#define IXGBE_MSCA_NP_ADDR_MASK		0x0000FFFF /* MDI Addr (new prot) */
-#define IXGBE_MSCA_NP_ADDR_SHIFT	0
-#define IXGBE_MSCA_DEV_TYPE_MASK	0x001F0000 /* Dev Type (new prot) */
-#define IXGBE_MSCA_DEV_TYPE_SHIFT	16 /* Register Address (old prot */
-#define IXGBE_MSCA_PHY_ADDR_MASK	0x03E00000 /* PHY Address mask */
-#define IXGBE_MSCA_PHY_ADDR_SHIFT	21 /* PHY Address shift*/
-#define IXGBE_MSCA_OP_CODE_MASK		0x0C000000 /* OP CODE mask */
-#define IXGBE_MSCA_OP_CODE_SHIFT	26 /* OP CODE shift */
-#define IXGBE_MSCA_ADDR_CYCLE		0x00000000 /* OP CODE 00 (addr cycle) */
-#define IXGBE_MSCA_WRITE		0x04000000 /* OP CODE 01 (wr) */
-#define IXGBE_MSCA_READ			0x0C000000 /* OP CODE 11 (rd) */
-#define IXGBE_MSCA_READ_AUTOINC		0x08000000 /* OP CODE 10 (rd auto inc)*/
-#define IXGBE_MSCA_ST_CODE_MASK		0x30000000 /* ST Code mask */
-#define IXGBE_MSCA_ST_CODE_SHIFT	28 /* ST Code shift */
-#define IXGBE_MSCA_NEW_PROTOCOL		0x00000000 /* ST CODE 00 (new prot) */
-#define IXGBE_MSCA_OLD_PROTOCOL		0x10000000 /* ST CODE 01 (old prot) */
-#define IXGBE_MSCA_MDI_COMMAND		0x40000000 /* Initiate MDI command */
-#define IXGBE_MSCA_MDI_IN_PROG_EN	0x80000000 /* MDI in progress ena */
-
-/* MSRWD bit masks */
-#define IXGBE_MSRWD_WRITE_DATA_MASK	0x0000FFFF
-#define IXGBE_MSRWD_WRITE_DATA_SHIFT	0
-#define IXGBE_MSRWD_READ_DATA_MASK	0xFFFF0000
-#define IXGBE_MSRWD_READ_DATA_SHIFT	16
-
-/* Atlas registers */
-#define IXGBE_ATLAS_PDN_LPBK		0x24
-#define IXGBE_ATLAS_PDN_10G		0xB
-#define IXGBE_ATLAS_PDN_1G		0xC
-#define IXGBE_ATLAS_PDN_AN		0xD
-
-/* Atlas bit masks */
-#define IXGBE_ATLASCTL_WRITE_CMD	0x00010000
-#define IXGBE_ATLAS_PDN_TX_REG_EN	0x10
-#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL	0xF0
-#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL	0xF0
-#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL	0xF0
-
-/* Omer bit masks */
-#define IXGBE_CORECTL_WRITE_CMD		0x00010000
-
-/* Device Type definitions for new protocol MDIO commands */
-#define IXGBE_MDIO_PMA_PMD_DEV_TYPE		0x1
-#define IXGBE_MDIO_PCS_DEV_TYPE			0x3
-#define IXGBE_MDIO_PHY_XS_DEV_TYPE		0x4
-#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE		0x7
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE	0x1E   /* Device 30 */
-#define IXGBE_TWINAX_DEV			1
-
-#define IXGBE_MDIO_COMMAND_TIMEOUT	100 /* PHY Timeout for 1 GB mode */
-
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL		0x0 /* VS1 Ctrl Reg */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS		0x1 /* VS1 Status Reg */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS	0x0008 /* 1 = Link Up */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS	0x0010 /* 0-10G, 1-1G */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED		0x0018
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED		0x0010
-
-#define IXGBE_MDIO_AUTO_NEG_CONTROL	0x0 /* AUTO_NEG Control Reg */
-#define IXGBE_MDIO_AUTO_NEG_STATUS	0x1 /* AUTO_NEG Status Reg */
-#define IXGBE_MDIO_AUTO_NEG_ADVT	0x10 /* AUTO_NEG Advt Reg */
-#define IXGBE_MDIO_AUTO_NEG_LP		0x13 /* AUTO_NEG LP Status Reg */
-#define IXGBE_MDIO_PHY_XS_CONTROL	0x0 /* PHY_XS Control Reg */
-#define IXGBE_MDIO_PHY_XS_RESET		0x8000 /* PHY_XS Reset */
-#define IXGBE_MDIO_PHY_ID_HIGH		0x2 /* PHY ID High Reg*/
-#define IXGBE_MDIO_PHY_ID_LOW		0x3 /* PHY ID Low Reg*/
-#define IXGBE_MDIO_PHY_SPEED_ABILITY	0x4 /* Speed Ability Reg */
-#define IXGBE_MDIO_PHY_SPEED_10G	0x0001 /* 10G capable */
-#define IXGBE_MDIO_PHY_SPEED_1G		0x0010 /* 1G capable */
-#define IXGBE_MDIO_PHY_SPEED_100M	0x0020 /* 100M capable */
-#define IXGBE_MDIO_PHY_EXT_ABILITY	0xB /* Ext Ability Reg */
-#define IXGBE_MDIO_PHY_10GBASET_ABILITY		0x0004 /* 10GBaseT capable */
-#define IXGBE_MDIO_PHY_1000BASET_ABILITY	0x0020 /* 1000BaseT capable */
-#define IXGBE_MDIO_PHY_100BASETX_ABILITY	0x0080 /* 100BaseTX capable */
-#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE	0x0800 /* Set low power mode */
-
-#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR	0x0000 /* PMA/PMD Control Reg */
-#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR	0xC30A /* PHY_XS SDA/SCL Addr Reg */
-#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA	0xC30B /* PHY_XS SDA/SCL Data Reg */
-#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT	0xC30C /* PHY_XS SDA/SCL Status Reg */
-
-/* MII clause 22/28 definitions */
-#define IXGBE_MDIO_PHY_LOW_POWER_MODE	0x0800
-
-#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG	0x20   /* 10G Control Reg */
-#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */
-#define IXGBE_MII_AUTONEG_XNP_TX_REG		0x17   /* 1G XNP Transmit */
-#define IXGBE_MII_AUTONEG_ADVERTISE_REG		0x10   /* 100M Advertisement */
-#define IXGBE_MII_10GBASE_T_ADVERTISE		0x1000 /* full duplex, bit:12*/
-#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX	0x4000 /* full duplex, bit:14*/
-#define IXGBE_MII_1GBASE_T_ADVERTISE		0x8000 /* full duplex, bit:15*/
-#define IXGBE_MII_100BASE_T_ADVERTISE		0x0100 /* full duplex, bit:8 */
-#define IXGBE_MII_100BASE_T_ADVERTISE_HALF	0x0080 /* half duplex, bit:7 */
-#define IXGBE_MII_RESTART			0x200
-#define IXGBE_MII_AUTONEG_COMPLETE		0x20
-#define IXGBE_MII_AUTONEG_LINK_UP		0x04
-#define IXGBE_MII_AUTONEG_REG			0x0
-
-#define IXGBE_PHY_REVISION_MASK		0xFFFFFFF0
-#define IXGBE_MAX_PHY_ADDR		32
-
-/* PHY IDs*/
-#define TN1010_PHY_ID	0x00A19410
-#define TNX_FW_REV	0xB
-#define X540_PHY_ID	0x01540200
-#define AQ_FW_REV	0x20
-#define QT2022_PHY_ID	0x0043A400
-#define ATH_PHY_ID	0x03429050
-
-/* PHY Types */
-#define IXGBE_M88E1145_E_PHY_ID	0x01410CD0
-
-/* Special PHY Init Routine */
-#define IXGBE_PHY_INIT_OFFSET_NL	0x002B
-#define IXGBE_PHY_INIT_END_NL		0xFFFF
-#define IXGBE_CONTROL_MASK_NL		0xF000
-#define IXGBE_DATA_MASK_NL		0x0FFF
-#define IXGBE_CONTROL_SHIFT_NL		12
-#define IXGBE_DELAY_NL			0
-#define IXGBE_DATA_NL			1
-#define IXGBE_CONTROL_NL		0x000F
-#define IXGBE_CONTROL_EOL_NL		0x0FFF
-#define IXGBE_CONTROL_SOL_NL		0x0000
-
-/* General purpose Interrupt Enable */
-#define IXGBE_SDP0_GPIEN	0x00000001 /* SDP0 */
-#define IXGBE_SDP1_GPIEN	0x00000002 /* SDP1 */
-#define IXGBE_SDP2_GPIEN	0x00000004 /* SDP2 */
-#define IXGBE_GPIE_MSIX_MODE	0x00000010 /* MSI-X mode */
-#define IXGBE_GPIE_OCD		0x00000020 /* Other Clear Disable */
-#define IXGBE_GPIE_EIMEN	0x00000040 /* Immediate Interrupt Enable */
-#define IXGBE_GPIE_EIAME	0x40000000
-#define IXGBE_GPIE_PBA_SUPPORT	0x80000000
-#define IXGBE_GPIE_RSC_DELAY_SHIFT	11
-#define IXGBE_GPIE_VTMODE_MASK	0x0000C000 /* VT Mode Mask */
-#define IXGBE_GPIE_VTMODE_16	0x00004000 /* 16 VFs 8 queues per VF */
-#define IXGBE_GPIE_VTMODE_32	0x00008000 /* 32 VFs 4 queues per VF */
-#define IXGBE_GPIE_VTMODE_64	0x0000C000 /* 64 VFs 2 queues per VF */
-
-/* Packet Buffer Initialization */
-#define IXGBE_MAX_PACKET_BUFFERS	8
-
-#define IXGBE_TXPBSIZE_20KB	0x00005000 /* 20KB Packet Buffer */
-#define IXGBE_TXPBSIZE_40KB	0x0000A000 /* 40KB Packet Buffer */
-#define IXGBE_RXPBSIZE_48KB	0x0000C000 /* 48KB Packet Buffer */
-#define IXGBE_RXPBSIZE_64KB	0x00010000 /* 64KB Packet Buffer */
-#define IXGBE_RXPBSIZE_80KB	0x00014000 /* 80KB Packet Buffer */
-#define IXGBE_RXPBSIZE_128KB	0x00020000 /* 128KB Packet Buffer */
-#define IXGBE_RXPBSIZE_MAX	0x00080000 /* 512KB Packet Buffer */
-#define IXGBE_TXPBSIZE_MAX	0x00028000 /* 160KB Packet Buffer */
-
-#define IXGBE_TXPKT_SIZE_MAX	0xA /* Max Tx Packet size */
-#define IXGBE_MAX_PB		8
-
-/* Packet buffer allocation strategies */
-enum {
-	PBA_STRATEGY_EQUAL	= 0, /* Distribute PB space equally */
-#define PBA_STRATEGY_EQUAL	PBA_STRATEGY_EQUAL
-	PBA_STRATEGY_WEIGHTED	= 1, /* Weight front half of TCs */
-#define PBA_STRATEGY_WEIGHTED	PBA_STRATEGY_WEIGHTED
-};
-
-/* Transmit Flow Control status */
-#define IXGBE_TFCS_TXOFF	0x00000001
-#define IXGBE_TFCS_TXOFF0	0x00000100
-#define IXGBE_TFCS_TXOFF1	0x00000200
-#define IXGBE_TFCS_TXOFF2	0x00000400
-#define IXGBE_TFCS_TXOFF3	0x00000800
-#define IXGBE_TFCS_TXOFF4	0x00001000
-#define IXGBE_TFCS_TXOFF5	0x00002000
-#define IXGBE_TFCS_TXOFF6	0x00004000
-#define IXGBE_TFCS_TXOFF7	0x00008000
-
-/* TCP Timer */
-#define IXGBE_TCPTIMER_KS		0x00000100
-#define IXGBE_TCPTIMER_COUNT_ENABLE	0x00000200
-#define IXGBE_TCPTIMER_COUNT_FINISH	0x00000400
-#define IXGBE_TCPTIMER_LOOP		0x00000800
-#define IXGBE_TCPTIMER_DURATION_MASK	0x000000FF
-
-/* HLREG0 Bit Masks */
-#define IXGBE_HLREG0_TXCRCEN		0x00000001 /* bit  0 */
-#define IXGBE_HLREG0_RXCRCSTRP		0x00000002 /* bit  1 */
-#define IXGBE_HLREG0_JUMBOEN		0x00000004 /* bit  2 */
-#define IXGBE_HLREG0_TXPADEN		0x00000400 /* bit 10 */
-#define IXGBE_HLREG0_TXPAUSEEN		0x00001000 /* bit 12 */
-#define IXGBE_HLREG0_RXPAUSEEN		0x00004000 /* bit 14 */
-#define IXGBE_HLREG0_LPBK		0x00008000 /* bit 15 */
-#define IXGBE_HLREG0_MDCSPD		0x00010000 /* bit 16 */
-#define IXGBE_HLREG0_CONTMDC		0x00020000 /* bit 17 */
-#define IXGBE_HLREG0_CTRLFLTR		0x00040000 /* bit 18 */
-#define IXGBE_HLREG0_PREPEND		0x00F00000 /* bits 20-23 */
-#define IXGBE_HLREG0_PRIPAUSEEN		0x01000000 /* bit 24 */
-#define IXGBE_HLREG0_RXPAUSERECDA	0x06000000 /* bits 25-26 */
-#define IXGBE_HLREG0_RXLNGTHERREN	0x08000000 /* bit 27 */
-#define IXGBE_HLREG0_RXPADSTRIPEN	0x10000000 /* bit 28 */
-
-/* VMD_CTL bitmasks */
-#define IXGBE_VMD_CTL_VMDQ_EN		0x00000001
-#define IXGBE_VMD_CTL_VMDQ_FILTER	0x00000002
-
-/* VT_CTL bitmasks */
-#define IXGBE_VT_CTL_DIS_DEFPL		0x20000000 /* disable default pool */
-#define IXGBE_VT_CTL_REPLEN		0x40000000 /* replication enabled */
-#define IXGBE_VT_CTL_VT_ENABLE		0x00000001  /* Enable VT Mode */
-#define IXGBE_VT_CTL_POOL_SHIFT		7
-#define IXGBE_VT_CTL_POOL_MASK		(0x3F << IXGBE_VT_CTL_POOL_SHIFT)
-
-/* VMOLR bitmasks */
-#define IXGBE_VMOLR_AUPE	0x01000000 /* accept untagged packets */
-#define IXGBE_VMOLR_ROMPE	0x02000000 /* accept packets in MTA tbl */
-#define IXGBE_VMOLR_ROPE	0x04000000 /* accept packets in UC tbl */
-#define IXGBE_VMOLR_BAM		0x08000000 /* accept broadcast packets */
-#define IXGBE_VMOLR_MPE		0x10000000 /* multicast promiscuous */
-
-/* VFRE bitmask */
-#define IXGBE_VFRE_ENABLE_ALL	0xFFFFFFFF
-
-#define IXGBE_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
-
-/* RDHMPN and TDHMPN bitmasks */
-#define IXGBE_RDHMPN_RDICADDR		0x007FF800
-#define IXGBE_RDHMPN_RDICRDREQ		0x00800000
-#define IXGBE_RDHMPN_RDICADDR_SHIFT	11
-#define IXGBE_TDHMPN_TDICADDR		0x003FF800
-#define IXGBE_TDHMPN_TDICRDREQ		0x00800000
-#define IXGBE_TDHMPN_TDICADDR_SHIFT	11
-
-#define IXGBE_RDMAM_MEM_SEL_SHIFT		13
-#define IXGBE_RDMAM_DWORD_SHIFT			9
-#define IXGBE_RDMAM_DESC_COMP_FIFO		1
-#define IXGBE_RDMAM_DFC_CMD_FIFO		2
-#define IXGBE_RDMAM_RSC_HEADER_ADDR		3
-#define IXGBE_RDMAM_TCN_STATUS_RAM		4
-#define IXGBE_RDMAM_WB_COLL_FIFO		5
-#define IXGBE_RDMAM_QSC_CNT_RAM			6
-#define IXGBE_RDMAM_QSC_FCOE_RAM		7
-#define IXGBE_RDMAM_QSC_QUEUE_CNT		8
-#define IXGBE_RDMAM_QSC_QUEUE_RAM		0xA
-#define IXGBE_RDMAM_QSC_RSC_RAM			0xB
-#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE		135
-#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT		4
-#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE		48
-#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT		7
-#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE	32
-#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT	4
-#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE	256
-#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT	9
-#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE		8
-#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT		4
-#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE		64
-#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT		4
-#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE		512
-#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT		5
-#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE		32
-#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT		4
-#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE		128
-#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT		8
-#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE		32
-#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT		8
-
-#define IXGBE_TXDESCIC_READY	0x80000000
-
-/* Receive Checksum Control */
-#define IXGBE_RXCSUM_IPPCSE	0x00001000 /* IP payload checksum enable */
-#define IXGBE_RXCSUM_PCSD	0x00002000 /* packet checksum disabled */
-
-/* FCRTL Bit Masks */
-#define IXGBE_FCRTL_XONE	0x80000000 /* XON enable */
-#define IXGBE_FCRTH_FCEN	0x80000000 /* Packet buffer fc enable */
-
-/* PAP bit masks*/
-#define IXGBE_PAP_TXPAUSECNT_MASK	0x0000FFFF /* Pause counter mask */
-
-/* RMCS Bit Masks */
-#define IXGBE_RMCS_RRM			0x00000002 /* Rx Recycle Mode enable */
-/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
-#define IXGBE_RMCS_RAC			0x00000004
-/* Deficit Fixed Prio ena */
-#define IXGBE_RMCS_DFP			IXGBE_RMCS_RAC
-#define IXGBE_RMCS_TFCE_802_3X		0x00000008 /* Tx Priority FC ena */
-#define IXGBE_RMCS_TFCE_PRIORITY	0x00000010 /* Tx Priority FC ena */
-#define IXGBE_RMCS_ARBDIS		0x00000040 /* Arbitration disable bit */
-
-/* FCCFG Bit Masks */
-#define IXGBE_FCCFG_TFCE_802_3X		0x00000008 /* Tx link FC enable */
-#define IXGBE_FCCFG_TFCE_PRIORITY	0x00000010 /* Tx priority FC enable */
-
-/* Interrupt register bitmasks */
-
-/* Extended Interrupt Cause Read */
-#define IXGBE_EICR_RTX_QUEUE	0x0000FFFF /* RTx Queue Interrupt */
-#define IXGBE_EICR_FLOW_DIR	0x00010000 /* FDir Exception */
-#define IXGBE_EICR_RX_MISS	0x00020000 /* Packet Buffer Overrun */
-#define IXGBE_EICR_PCI		0x00040000 /* PCI Exception */
-#define IXGBE_EICR_MAILBOX	0x00080000 /* VF to PF Mailbox Interrupt */
-#define IXGBE_EICR_LSC		0x00100000 /* Link Status Change */
-#define IXGBE_EICR_LINKSEC	0x00200000 /* PN Threshold */
-#define IXGBE_EICR_MNG		0x00400000 /* Manageability Event Interrupt */
-#define IXGBE_EICR_TS		0x00800000 /* Thermal Sensor Event */
-#define IXGBE_EICR_GPI_SDP0	0x01000000 /* Gen Purpose Interrupt on SDP0 */
-#define IXGBE_EICR_GPI_SDP1	0x02000000 /* Gen Purpose Interrupt on SDP1 */
-#define IXGBE_EICR_GPI_SDP2	0x04000000 /* Gen Purpose Interrupt on SDP2 */
-#define IXGBE_EICR_ECC		0x10000000 /* ECC Error */
-#define IXGBE_EICR_PBUR		0x10000000 /* Packet Buffer Handler Error */
-#define IXGBE_EICR_DHER		0x20000000 /* Descriptor Handler Error */
-#define IXGBE_EICR_TCP_TIMER	0x40000000 /* TCP Timer */
-#define IXGBE_EICR_OTHER	0x80000000 /* Interrupt Cause Active */
-
-/* Extended Interrupt Cause Set */
-#define IXGBE_EICS_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
-#define IXGBE_EICS_FLOW_DIR	IXGBE_EICR_FLOW_DIR  /* FDir Exception */
-#define IXGBE_EICS_RX_MISS	IXGBE_EICR_RX_MISS   /* Pkt Buffer Overrun */
-#define IXGBE_EICS_PCI		IXGBE_EICR_PCI /* PCI Exception */
-#define IXGBE_EICS_MAILBOX	IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
-#define IXGBE_EICS_LSC		IXGBE_EICR_LSC /* Link Status Change */
-#define IXGBE_EICS_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
-#define IXGBE_EICS_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
-#define IXGBE_EICS_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
-#define IXGBE_EICS_GPI_SDP2	IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
-#define IXGBE_EICS_ECC		IXGBE_EICR_ECC /* ECC Error */
-#define IXGBE_EICS_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
-#define IXGBE_EICS_DHER		IXGBE_EICR_DHER /* Desc Handler Error */
-#define IXGBE_EICS_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
-#define IXGBE_EICS_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
-
-/* Extended Interrupt Mask Set */
-#define IXGBE_EIMS_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
-#define IXGBE_EIMS_FLOW_DIR	IXGBE_EICR_FLOW_DIR /* FDir Exception */
-#define IXGBE_EIMS_RX_MISS	IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
-#define IXGBE_EIMS_PCI		IXGBE_EICR_PCI /* PCI Exception */
-#define IXGBE_EIMS_MAILBOX	IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
-#define IXGBE_EIMS_LSC		IXGBE_EICR_LSC /* Link Status Change */
-#define IXGBE_EIMS_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
-#define IXGBE_EIMS_TS		IXGBE_EICR_TS /* Thermal Sensor Event */
-#define IXGBE_EIMS_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
-#define IXGBE_EIMS_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
-#define IXGBE_EIMS_GPI_SDP2	IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
-#define IXGBE_EIMS_ECC		IXGBE_EICR_ECC /* ECC Error */
-#define IXGBE_EIMS_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
-#define IXGBE_EIMS_DHER		IXGBE_EICR_DHER /* Descr Handler Error */
-#define IXGBE_EIMS_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
-#define IXGBE_EIMS_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
-
-/* Extended Interrupt Mask Clear */
-#define IXGBE_EIMC_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
-#define IXGBE_EIMC_FLOW_DIR	IXGBE_EICR_FLOW_DIR /* FDir Exception */
-#define IXGBE_EIMC_RX_MISS	IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
-#define IXGBE_EIMC_PCI		IXGBE_EICR_PCI /* PCI Exception */
-#define IXGBE_EIMC_MAILBOX	IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */
-#define IXGBE_EIMC_LSC		IXGBE_EICR_LSC /* Link Status Change */
-#define IXGBE_EIMC_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
-#define IXGBE_EIMC_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
-#define IXGBE_EIMC_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
-#define IXGBE_EIMC_GPI_SDP2	IXGBE_EICR_GPI_SDP2  /* SDP2 Gen Purpose Int */
-#define IXGBE_EIMC_ECC		IXGBE_EICR_ECC /* ECC Error */
-#define IXGBE_EIMC_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
-#define IXGBE_EIMC_DHER		IXGBE_EICR_DHER /* Desc Handler Err */
-#define IXGBE_EIMC_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
-#define IXGBE_EIMC_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
-
-#define IXGBE_EIMS_ENABLE_MASK ( \
-				IXGBE_EIMS_RTX_QUEUE	| \
-				IXGBE_EIMS_LSC		| \
-				IXGBE_EIMS_TCP_TIMER	| \
-				IXGBE_EIMS_OTHER)
-
-/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
-#define IXGBE_IMIR_PORT_IM_EN	0x00010000  /* TCP port enable */
-#define IXGBE_IMIR_PORT_BP	0x00020000  /* TCP port check bypass */
-#define IXGBE_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
-#define IXGBE_IMIREXT_CTRL_URG	0x00002000  /* Check URG bit in header */
-#define IXGBE_IMIREXT_CTRL_ACK	0x00004000  /* Check ACK bit in header */
-#define IXGBE_IMIREXT_CTRL_PSH	0x00008000  /* Check PSH bit in header */
-#define IXGBE_IMIREXT_CTRL_RST	0x00010000  /* Check RST bit in header */
-#define IXGBE_IMIREXT_CTRL_SYN	0x00020000  /* Check SYN bit in header */
-#define IXGBE_IMIREXT_CTRL_FIN	0x00040000  /* Check FIN bit in header */
-#define IXGBE_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of control bits */
-#define IXGBE_IMIR_SIZE_BP_82599	0x00001000 /* Packet size bypass */
-#define IXGBE_IMIR_CTRL_URG_82599	0x00002000 /* Check URG bit in header */
-#define IXGBE_IMIR_CTRL_ACK_82599	0x00004000 /* Check ACK bit in header */
-#define IXGBE_IMIR_CTRL_PSH_82599	0x00008000 /* Check PSH bit in header */
-#define IXGBE_IMIR_CTRL_RST_82599	0x00010000 /* Check RST bit in header */
-#define IXGBE_IMIR_CTRL_SYN_82599	0x00020000 /* Check SYN bit in header */
-#define IXGBE_IMIR_CTRL_FIN_82599	0x00040000 /* Check FIN bit in header */
-#define IXGBE_IMIR_CTRL_BP_82599	0x00080000 /* Bypass chk of ctrl bits */
-#define IXGBE_IMIR_LLI_EN_82599		0x00100000 /* Enables low latency Int */
-#define IXGBE_IMIR_RX_QUEUE_MASK_82599	0x0000007F /* Rx Queue Mask */
-#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599	21 /* Rx Queue Shift */
-#define IXGBE_IMIRVP_PRIORITY_MASK	0x00000007 /* VLAN priority mask */
-#define IXGBE_IMIRVP_PRIORITY_EN	0x00000008 /* VLAN priority enable */
-
-#define IXGBE_MAX_FTQF_FILTERS		128
-#define IXGBE_FTQF_PROTOCOL_MASK	0x00000003
-#define IXGBE_FTQF_PROTOCOL_TCP		0x00000000
-#define IXGBE_FTQF_PROTOCOL_UDP		0x00000001
-#define IXGBE_FTQF_PROTOCOL_SCTP	2
-#define IXGBE_FTQF_PRIORITY_MASK	0x00000007
-#define IXGBE_FTQF_PRIORITY_SHIFT	2
-#define IXGBE_FTQF_POOL_MASK		0x0000003F
-#define IXGBE_FTQF_POOL_SHIFT		8
-#define IXGBE_FTQF_5TUPLE_MASK_MASK	0x0000001F
-#define IXGBE_FTQF_5TUPLE_MASK_SHIFT	25
-#define IXGBE_FTQF_SOURCE_ADDR_MASK	0x1E
-#define IXGBE_FTQF_DEST_ADDR_MASK	0x1D
-#define IXGBE_FTQF_SOURCE_PORT_MASK	0x1B
-#define IXGBE_FTQF_DEST_PORT_MASK	0x17
-#define IXGBE_FTQF_PROTOCOL_COMP_MASK	0x0F
-#define IXGBE_FTQF_POOL_MASK_EN		0x40000000
-#define IXGBE_FTQF_QUEUE_ENABLE		0x80000000
-
-/* Interrupt clear mask */
-#define IXGBE_IRQ_CLEAR_MASK	0xFFFFFFFF
-
-/* Interrupt Vector Allocation Registers */
-#define IXGBE_IVAR_REG_NUM		25
-#define IXGBE_IVAR_REG_NUM_82599	64
-#define IXGBE_IVAR_TXRX_ENTRY		96
-#define IXGBE_IVAR_RX_ENTRY		64
-#define IXGBE_IVAR_RX_QUEUE(_i)		(0 + (_i))
-#define IXGBE_IVAR_TX_QUEUE(_i)		(64 + (_i))
-#define IXGBE_IVAR_TX_ENTRY		32
-
-#define IXGBE_IVAR_TCP_TIMER_INDEX	96 /* 0 based index */
-#define IXGBE_IVAR_OTHER_CAUSES_INDEX	97 /* 0 based index */
-
-#define IXGBE_MSIX_VECTOR(_i)		(0 + (_i))
-
-#define IXGBE_IVAR_ALLOC_VAL		0x80 /* Interrupt Allocation valid */
-
-/* ETYPE Queue Filter/Select Bit Masks */
-#define IXGBE_MAX_ETQF_FILTERS		8
-#define IXGBE_ETQF_FCOE			0x08000000 /* bit 27 */
-#define IXGBE_ETQF_BCN			0x10000000 /* bit 28 */
-#define IXGBE_ETQF_1588			0x40000000 /* bit 30 */
-#define IXGBE_ETQF_FILTER_EN		0x80000000 /* bit 31 */
-#define IXGBE_ETQF_POOL_ENABLE		(1 << 26) /* bit 26 */
-
-#define IXGBE_ETQS_RX_QUEUE		0x007F0000 /* bits 22:16 */
-#define IXGBE_ETQS_RX_QUEUE_SHIFT	16
-#define IXGBE_ETQS_LLI			0x20000000 /* bit 29 */
-#define IXGBE_ETQS_QUEUE_EN		0x80000000 /* bit 31 */
-
-/*
- * ETQF filter list: one static filter per filter consumer. This is
- *		   to avoid filter collisions later. Add new filters
- *		   here!!
- *
- * Current filters:
- *	EAPOL 802.1x (0x888e): Filter 0
- *	FCoE (0x8906):	 Filter 2
- *	1588 (0x88f7):	 Filter 3
- *	FIP  (0x8914):	 Filter 4
- */
-#define IXGBE_ETQF_FILTER_EAPOL		0
-#define IXGBE_ETQF_FILTER_FCOE		2
-#define IXGBE_ETQF_FILTER_1588		3
-#define IXGBE_ETQF_FILTER_FIP		4
-/* VLAN Control Bit Masks */
-#define IXGBE_VLNCTRL_VET		0x0000FFFF  /* bits 0-15 */
-#define IXGBE_VLNCTRL_CFI		0x10000000  /* bit 28 */
-#define IXGBE_VLNCTRL_CFIEN		0x20000000  /* bit 29 */
-#define IXGBE_VLNCTRL_VFE		0x40000000  /* bit 30 */
-#define IXGBE_VLNCTRL_VME		0x80000000  /* bit 31 */
-
-/* VLAN pool filtering masks */
-#define IXGBE_VLVF_VIEN			0x80000000  /* filter is valid */
-#define IXGBE_VLVF_ENTRIES		64
-#define IXGBE_VLVF_VLANID_MASK		0x00000FFF
-/* Per VF Port VLAN insertion rules */
-#define IXGBE_VMVIR_VLANA_DEFAULT	0x40000000 /* Always use default VLAN */
-#define IXGBE_VMVIR_VLANA_NEVER		0x80000000 /* Never insert VLAN tag */
-
-#define IXGBE_ETHERNET_IEEE_VLAN_TYPE	0x8100  /* 802.1q protocol */
-
-/* STATUS Bit Masks */
-#define IXGBE_STATUS_LAN_ID		0x0000000C /* LAN ID */
-#define IXGBE_STATUS_LAN_ID_SHIFT	2 /* LAN ID Shift*/
-#define IXGBE_STATUS_GIO		0x00080000 /* GIO Master Ena Status */
-
-#define IXGBE_STATUS_LAN_ID_0	0x00000000 /* LAN ID 0 */
-#define IXGBE_STATUS_LAN_ID_1	0x00000004 /* LAN ID 1 */
-
-/* ESDP Bit Masks */
-#define IXGBE_ESDP_SDP0		0x00000001 /* SDP0 Data Value */
-#define IXGBE_ESDP_SDP1		0x00000002 /* SDP1 Data Value */
-#define IXGBE_ESDP_SDP2		0x00000004 /* SDP2 Data Value */
-#define IXGBE_ESDP_SDP3		0x00000008 /* SDP3 Data Value */
-#define IXGBE_ESDP_SDP4		0x00000010 /* SDP4 Data Value */
-#define IXGBE_ESDP_SDP5		0x00000020 /* SDP5 Data Value */
-#define IXGBE_ESDP_SDP6		0x00000040 /* SDP6 Data Value */
-#define IXGBE_ESDP_SDP0_DIR	0x00000100 /* SDP0 IO direction */
-#define IXGBE_ESDP_SDP1_DIR	0x00000200 /* SDP1 IO direction */
-#define IXGBE_ESDP_SDP4_DIR	0x00001000 /* SDP4 IO direction */
-#define IXGBE_ESDP_SDP5_DIR	0x00002000 /* SDP5 IO direction */
-#define IXGBE_ESDP_SDP0_NATIVE	0x00010000 /* SDP0 IO mode */
-#define IXGBE_ESDP_SDP1_NATIVE	0x00020000 /* SDP1 IO mode */
-
-
-/* LEDCTL Bit Masks */
-#define IXGBE_LED_IVRT_BASE		0x00000040
-#define IXGBE_LED_BLINK_BASE		0x00000080
-#define IXGBE_LED_MODE_MASK_BASE	0x0000000F
-#define IXGBE_LED_OFFSET(_base, _i)	(_base << (8 * (_i)))
-#define IXGBE_LED_MODE_SHIFT(_i)	(8*(_i))
-#define IXGBE_LED_IVRT(_i)	IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
-#define IXGBE_LED_BLINK(_i)	IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
-#define IXGBE_LED_MODE_MASK(_i)	IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
-
-/* LED modes */
-#define IXGBE_LED_LINK_UP	0x0
-#define IXGBE_LED_LINK_10G	0x1
-#define IXGBE_LED_MAC		0x2
-#define IXGBE_LED_FILTER	0x3
-#define IXGBE_LED_LINK_ACTIVE	0x4
-#define IXGBE_LED_LINK_1G	0x5
-#define IXGBE_LED_ON		0xE
-#define IXGBE_LED_OFF		0xF
-
-/* AUTOC Bit Masks */
-#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000
-#define IXGBE_AUTOC_KX4_SUPP	0x80000000
-#define IXGBE_AUTOC_KX_SUPP	0x40000000
-#define IXGBE_AUTOC_PAUSE	0x30000000
-#define IXGBE_AUTOC_ASM_PAUSE	0x20000000
-#define IXGBE_AUTOC_SYM_PAUSE	0x10000000
-#define IXGBE_AUTOC_RF		0x08000000
-#define IXGBE_AUTOC_PD_TMR	0x06000000
-#define IXGBE_AUTOC_AN_RX_LOOSE	0x01000000
-#define IXGBE_AUTOC_AN_RX_DRIFT	0x00800000
-#define IXGBE_AUTOC_AN_RX_ALIGN	0x007C0000
-#define IXGBE_AUTOC_FECA	0x00040000
-#define IXGBE_AUTOC_FECR	0x00020000
-#define IXGBE_AUTOC_KR_SUPP	0x00010000
-#define IXGBE_AUTOC_AN_RESTART	0x00001000
-#define IXGBE_AUTOC_FLU		0x00000001
-#define IXGBE_AUTOC_LMS_SHIFT	13
-#define IXGBE_AUTOC_LMS_10G_SERIAL	(0x3 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_KX4_KX_KR	(0x4 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_SGMII_1G_100M	(0x5 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN	(0x6 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII	(0x7 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_MASK		(0x7 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN	(0x0 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN	(0x1 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_1G_AN		(0x2 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_KX4_AN		(0x4 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN	(0x6 << IXGBE_AUTOC_LMS_SHIFT)
-#define IXGBE_AUTOC_LMS_ATTACH_TYPE	(0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
-
-#define IXGBE_AUTOC_1G_PMA_PMD_MASK	0x00000200
-#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT	9
-#define IXGBE_AUTOC_10G_PMA_PMD_MASK	0x00000180
-#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT	7
-#define IXGBE_AUTOC_10G_XAUI	(0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_10G_KX4	(0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_10G_CX4	(0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_1G_BX	(0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_1G_KX	(0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_1G_SFI	(0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC_1G_KX_BX	(0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
-
-#define IXGBE_AUTOC2_UPPER_MASK	0xFFFF0000
-#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK	0x00030000
-#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT	16
-#define IXGBE_AUTOC2_10G_KR	(0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC2_10G_XFI	(0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
-#define IXGBE_AUTOC2_10G_SFI	(0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
-
-#define IXGBE_MACC_FLU		0x00000001
-#define IXGBE_MACC_FSV_10G	0x00030000
-#define IXGBE_MACC_FS		0x00040000
-#define IXGBE_MAC_RX2TX_LPBK	0x00000002
-
-/* LINKS Bit Masks */
-#define IXGBE_LINKS_KX_AN_COMP	0x80000000
-#define IXGBE_LINKS_UP		0x40000000
-#define IXGBE_LINKS_SPEED	0x20000000
-#define IXGBE_LINKS_MODE	0x18000000
-#define IXGBE_LINKS_RX_MODE	0x06000000
-#define IXGBE_LINKS_TX_MODE	0x01800000
-#define IXGBE_LINKS_XGXS_EN	0x00400000
-#define IXGBE_LINKS_SGMII_EN	0x02000000
-#define IXGBE_LINKS_PCS_1G_EN	0x00200000
-#define IXGBE_LINKS_1G_AN_EN	0x00100000
-#define IXGBE_LINKS_KX_AN_IDLE	0x00080000
-#define IXGBE_LINKS_1G_SYNC	0x00040000
-#define IXGBE_LINKS_10G_ALIGN	0x00020000
-#define IXGBE_LINKS_10G_LANE_SYNC	0x00017000
-#define IXGBE_LINKS_TL_FAULT		0x00001000
-#define IXGBE_LINKS_SIGNAL		0x00000F00
-
-#define IXGBE_LINKS_SPEED_82599		0x30000000
-#define IXGBE_LINKS_SPEED_10G_82599	0x30000000
-#define IXGBE_LINKS_SPEED_1G_82599	0x20000000
-#define IXGBE_LINKS_SPEED_100_82599	0x10000000
-#define IXGBE_LINK_UP_TIME		90 /* 9.0 Seconds */
-#define IXGBE_AUTO_NEG_TIME		45 /* 4.5 Seconds */
-
-#define IXGBE_LINKS2_AN_SUPPORTED	0x00000040
-
-/* PCS1GLSTA Bit Masks */
-#define IXGBE_PCS1GLSTA_LINK_OK		1
-#define IXGBE_PCS1GLSTA_SYNK_OK		0x10
-#define IXGBE_PCS1GLSTA_AN_COMPLETE	0x10000
-#define IXGBE_PCS1GLSTA_AN_PAGE_RX	0x20000
-#define IXGBE_PCS1GLSTA_AN_TIMED_OUT	0x40000
-#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT	0x80000
-#define IXGBE_PCS1GLSTA_AN_ERROR_RWS	0x100000
-
-#define IXGBE_PCS1GANA_SYM_PAUSE	0x80
-#define IXGBE_PCS1GANA_ASM_PAUSE	0x100
-
-/* PCS1GLCTL Bit Masks */
-#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */
-#define IXGBE_PCS1GLCTL_FLV_LINK_UP	1
-#define IXGBE_PCS1GLCTL_FORCE_LINK	0x20
-#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH	0x40
-#define IXGBE_PCS1GLCTL_AN_ENABLE	0x10000
-#define IXGBE_PCS1GLCTL_AN_RESTART	0x20000
-
-/* ANLP1 Bit Masks */
-#define IXGBE_ANLP1_PAUSE		0x0C00
-#define IXGBE_ANLP1_SYM_PAUSE		0x0400
-#define IXGBE_ANLP1_ASM_PAUSE		0x0800
-#define IXGBE_ANLP1_AN_STATE_MASK	0x000f0000
-
-/* SW Semaphore Register bitmasks */
-#define IXGBE_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
-#define IXGBE_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
-#define IXGBE_SWSM_WMNG		0x00000004 /* Wake MNG Clock */
-#define IXGBE_SWFW_REGSMP	0x80000000 /* Register Semaphore bit 31 */
-
-/* SW_FW_SYNC/GSSR definitions */
-#define IXGBE_GSSR_EEP_SM	0x0001
-#define IXGBE_GSSR_PHY0_SM	0x0002
-#define IXGBE_GSSR_PHY1_SM	0x0004
-#define IXGBE_GSSR_MAC_CSR_SM	0x0008
-#define IXGBE_GSSR_FLASH_SM	0x0010
-#define IXGBE_GSSR_SW_MNG_SM	0x0400
-
-/* FW Status register bitmask */
-#define IXGBE_FWSTS_FWRI	0x00000200 /* Firmware Reset Indication */
-
-/* EEC Register */
-#define IXGBE_EEC_SK		0x00000001 /* EEPROM Clock */
-#define IXGBE_EEC_CS		0x00000002 /* EEPROM Chip Select */
-#define IXGBE_EEC_DI		0x00000004 /* EEPROM Data In */
-#define IXGBE_EEC_DO		0x00000008 /* EEPROM Data Out */
-#define IXGBE_EEC_FWE_MASK	0x00000030 /* FLASH Write Enable */
-#define IXGBE_EEC_FWE_DIS	0x00000010 /* Disable FLASH writes */
-#define IXGBE_EEC_FWE_EN	0x00000020 /* Enable FLASH writes */
-#define IXGBE_EEC_FWE_SHIFT	4
-#define IXGBE_EEC_REQ		0x00000040 /* EEPROM Access Request */
-#define IXGBE_EEC_GNT		0x00000080 /* EEPROM Access Grant */
-#define IXGBE_EEC_PRES		0x00000100 /* EEPROM Present */
-#define IXGBE_EEC_ARD		0x00000200 /* EEPROM Auto Read Done */
-#define IXGBE_EEC_FLUP		0x00800000 /* Flash update command */
-#define IXGBE_EEC_SEC1VAL	0x02000000 /* Sector 1 Valid */
-#define IXGBE_EEC_FLUDONE	0x04000000 /* Flash update done */
-/* EEPROM Addressing bits based on type (0-small, 1-large) */
-#define IXGBE_EEC_ADDR_SIZE	0x00000400
-#define IXGBE_EEC_SIZE		0x00007800 /* EEPROM Size */
-#define IXGBE_EERD_MAX_ADDR	0x00003FFF /* EERD alows 14 bits for addr. */
-
-#define IXGBE_EEC_SIZE_SHIFT		11
-#define IXGBE_EEPROM_WORD_SIZE_SHIFT	6
-#define IXGBE_EEPROM_OPCODE_BITS	8
-
-/* Part Number String Length */
-#define IXGBE_PBANUM_LENGTH	11
-
-/* Checksum and EEPROM pointers */
-#define IXGBE_PBANUM_PTR_GUARD	0xFAFA
-#define IXGBE_EEPROM_CHECKSUM	0x3F
-#define IXGBE_EEPROM_SUM	0xBABA
-#define IXGBE_PCIE_ANALOG_PTR	0x03
-#define IXGBE_ATLAS0_CONFIG_PTR	0x04
-#define IXGBE_PHY_PTR		0x04
-#define IXGBE_ATLAS1_CONFIG_PTR	0x05
-#define IXGBE_OPTION_ROM_PTR	0x05
-#define IXGBE_PCIE_GENERAL_PTR	0x06
-#define IXGBE_PCIE_CONFIG0_PTR	0x07
-#define IXGBE_PCIE_CONFIG1_PTR	0x08
-#define IXGBE_CORE0_PTR		0x09
-#define IXGBE_CORE1_PTR		0x0A
-#define IXGBE_MAC0_PTR		0x0B
-#define IXGBE_MAC1_PTR		0x0C
-#define IXGBE_CSR0_CONFIG_PTR	0x0D
-#define IXGBE_CSR1_CONFIG_PTR	0x0E
-#define IXGBE_FW_PTR		0x0F
-#define IXGBE_PBANUM0_PTR	0x15
-#define IXGBE_PBANUM1_PTR	0x16
-#define IXGBE_ALT_MAC_ADDR_PTR	0x37
-#define IXGBE_FREE_SPACE_PTR	0X3E
-
-/* External Thermal Sensor Config */
-#define IXGBE_ETS_CFG			0x26
-#define IXGBE_ETS_LTHRES_DELTA_MASK	0x07C0
-#define IXGBE_ETS_LTHRES_DELTA_SHIFT	6
-#define IXGBE_ETS_TYPE_MASK		0x0038
-#define IXGBE_ETS_TYPE_SHIFT		3
-#define IXGBE_ETS_TYPE_EMC		0x000
-#define IXGBE_ETS_NUM_SENSORS_MASK	0x0007
-#define IXGBE_ETS_DATA_LOC_MASK		0x3C00
-#define IXGBE_ETS_DATA_LOC_SHIFT	10
-#define IXGBE_ETS_DATA_INDEX_MASK	0x0300
-#define IXGBE_ETS_DATA_INDEX_SHIFT	8
-#define IXGBE_ETS_DATA_HTHRESH_MASK	0x00FF
-
-#define IXGBE_SAN_MAC_ADDR_PTR		0x28
-#define IXGBE_DEVICE_CAPS		0x2C
-#define IXGBE_SERIAL_NUMBER_MAC_ADDR	0x11
-#define IXGBE_PCIE_MSIX_82599_CAPS	0x72
-#define IXGBE_MAX_MSIX_VECTORS_82599	0x40
-#define IXGBE_PCIE_MSIX_82598_CAPS	0x62
-#define IXGBE_MAX_MSIX_VECTORS_82598	0x13
-
-/* MSI-X capability fields masks */
-#define IXGBE_PCIE_MSIX_TBL_SZ_MASK	0x7FF
-
-/* Legacy EEPROM word offsets */
-#define IXGBE_ISCSI_BOOT_CAPS		0x0033
-#define IXGBE_ISCSI_SETUP_PORT_0	0x0030
-#define IXGBE_ISCSI_SETUP_PORT_1	0x0034
-
-/* EEPROM Commands - SPI */
-#define IXGBE_EEPROM_MAX_RETRY_SPI	5000 /* Max wait 5ms for RDY signal */
-#define IXGBE_EEPROM_STATUS_RDY_SPI	0x01
-#define IXGBE_EEPROM_READ_OPCODE_SPI	0x03  /* EEPROM read opcode */
-#define IXGBE_EEPROM_WRITE_OPCODE_SPI	0x02  /* EEPROM write opcode */
-#define IXGBE_EEPROM_A8_OPCODE_SPI	0x08  /* opcode bit-3 = addr bit-8 */
-#define IXGBE_EEPROM_WREN_OPCODE_SPI	0x06  /* EEPROM set Write Ena latch */
-/* EEPROM reset Write Enable latch */
-#define IXGBE_EEPROM_WRDI_OPCODE_SPI	0x04
-#define IXGBE_EEPROM_RDSR_OPCODE_SPI	0x05  /* EEPROM read Status reg */
-#define IXGBE_EEPROM_WRSR_OPCODE_SPI	0x01  /* EEPROM write Status reg */
-#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI	0x20  /* EEPROM ERASE 4KB */
-#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI	0xD8  /* EEPROM ERASE 64KB */
-#define IXGBE_EEPROM_ERASE256_OPCODE_SPI	0xDB  /* EEPROM ERASE 256B */
-
-/* EEPROM Read Register */
-#define IXGBE_EEPROM_RW_REG_DATA	16 /* data offset in EEPROM read reg */
-#define IXGBE_EEPROM_RW_REG_DONE	2 /* Offset to READ done bit */
-#define IXGBE_EEPROM_RW_REG_START	1 /* First bit to start operation */
-#define IXGBE_EEPROM_RW_ADDR_SHIFT	2 /* Shift to the address bits */
-#define IXGBE_NVM_POLL_WRITE		1 /* Flag for polling for wr complete */
-#define IXGBE_NVM_POLL_READ		0 /* Flag for polling for rd complete */
-
-#define IXGBE_ETH_LENGTH_OF_ADDRESS	6
-
-#define IXGBE_EEPROM_PAGE_SIZE_MAX	128
-#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT	512 /* words rd in burst */
-#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT	256 /* words wr in burst */
-
-#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS
-#define IXGBE_EEPROM_GRANT_ATTEMPTS	1000 /* EEPROM attempts to gain grant */
-#endif
-
-#ifndef IXGBE_EERD_EEWR_ATTEMPTS
-/* Number of 5 microseconds we wait for EERD read and
- * EERW write to complete */
-#define IXGBE_EERD_EEWR_ATTEMPTS	100000
-#endif
-
-#ifndef IXGBE_FLUDONE_ATTEMPTS
-/* # attempts we wait for flush update to complete */
-#define IXGBE_FLUDONE_ATTEMPTS		20000
-#endif
-
-#define IXGBE_PCIE_CTRL2		0x5   /* PCIe Control 2 Offset */
-#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE	0x8   /* Dummy Function Enable */
-#define IXGBE_PCIE_CTRL2_LAN_DISABLE	0x2   /* LAN PCI Disable */
-#define IXGBE_PCIE_CTRL2_DISABLE_SELECT	0x1   /* LAN Disable Select */
-
-#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET		0x0
-#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET		0x3
-#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP		0x1
-#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS		0x2
-#define IXGBE_FW_LESM_PARAMETERS_PTR		0x2
-#define IXGBE_FW_LESM_STATE_1			0x1
-#define IXGBE_FW_LESM_STATE_ENABLED		0x8000 /* LESM Enable bit */
-#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR	0x4
-#define IXGBE_FW_PATCH_VERSION_4		0x7
-#define IXGBE_FCOE_IBA_CAPS_BLK_PTR		0x33 /* iSCSI/FCOE block */
-#define IXGBE_FCOE_IBA_CAPS_FCOE		0x20 /* FCOE flags */
-#define IXGBE_ISCSI_FCOE_BLK_PTR		0x17 /* iSCSI/FCOE block */
-#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET		0x0 /* FCOE flags */
-#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE		0x1 /* FCOE flags enable bit */
-#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR		0x27 /* Alt. SAN MAC block */
-#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET	0x0 /* Alt SAN MAC capability */
-#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET	0x1 /* Alt SAN MAC 0 offset */
-#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET	0x4 /* Alt SAN MAC 1 offset */
-#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET	0x7 /* Alt WWNN prefix offset */
-#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET	0x8 /* Alt WWPN prefix offset */
-#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC	0x0 /* Alt SAN MAC exists */
-#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN	0x1 /* Alt WWN base exists */
-
-#define IXGBE_DEVICE_CAPS_WOL_PORT0_1	0x4 /* WoL supported on ports 0 & 1 */
-#define IXGBE_DEVICE_CAPS_WOL_PORT0	0x8 /* WoL supported on port 0 */
-#define IXGBE_DEVICE_CAPS_WOL_MASK	0xC /* Mask for WoL capabilities */
-
-/* PCI Bus Info */
-#define IXGBE_PCI_DEVICE_STATUS		0xAA
-#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING	0x0020
-#define IXGBE_PCI_LINK_STATUS		0xB2
-#define IXGBE_PCI_DEVICE_CONTROL2	0xC8
-#define IXGBE_PCI_LINK_WIDTH		0x3F0
-#define IXGBE_PCI_LINK_WIDTH_1		0x10
-#define IXGBE_PCI_LINK_WIDTH_2		0x20
-#define IXGBE_PCI_LINK_WIDTH_4		0x40
-#define IXGBE_PCI_LINK_WIDTH_8		0x80
-#define IXGBE_PCI_LINK_SPEED		0xF
-#define IXGBE_PCI_LINK_SPEED_2500	0x1
-#define IXGBE_PCI_LINK_SPEED_5000	0x2
-#define IXGBE_PCI_LINK_SPEED_8000	0x3
-#define IXGBE_PCI_HEADER_TYPE_REGISTER	0x0E
-#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC	0x80
-#define IXGBE_PCI_DEVICE_CONTROL2_16ms	0x0005
-
-/* Number of 100 microseconds we wait for PCI Express master disable */
-#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT	800
-
-/* Check whether address is multicast. This is little-endian specific check.*/
-#define IXGBE_IS_MULTICAST(Address) \
-		(bool)(((u8 *)(Address))[0] & ((u8)0x01))
-
-/* Check whether an address is broadcast. */
-#define IXGBE_IS_BROADCAST(Address) \
-		((((u8 *)(Address))[0] == ((u8)0xff)) && \
-		(((u8 *)(Address))[1] == ((u8)0xff)))
-
-/* RAH */
-#define IXGBE_RAH_VIND_MASK	0x003C0000
-#define IXGBE_RAH_VIND_SHIFT	18
-#define IXGBE_RAH_AV		0x80000000
-#define IXGBE_CLEAR_VMDQ_ALL	0xFFFFFFFF
-
-/* Header split receive */
-#define IXGBE_RFCTL_ISCSI_DIS		0x00000001
-#define IXGBE_RFCTL_ISCSI_DWC_MASK	0x0000003E
-#define IXGBE_RFCTL_ISCSI_DWC_SHIFT	1
-#define IXGBE_RFCTL_RSC_DIS		0x00000010
-#define IXGBE_RFCTL_NFSW_DIS		0x00000040
-#define IXGBE_RFCTL_NFSR_DIS		0x00000080
-#define IXGBE_RFCTL_NFS_VER_MASK	0x00000300
-#define IXGBE_RFCTL_NFS_VER_SHIFT	8
-#define IXGBE_RFCTL_NFS_VER_2		0
-#define IXGBE_RFCTL_NFS_VER_3		1
-#define IXGBE_RFCTL_NFS_VER_4		2
-#define IXGBE_RFCTL_IPV6_DIS		0x00000400
-#define IXGBE_RFCTL_IPV6_XSUM_DIS	0x00000800
-#define IXGBE_RFCTL_IPFRSP_DIS		0x00004000
-#define IXGBE_RFCTL_IPV6_EX_DIS		0x00010000
-#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS	0x00020000
-
-/* Transmit Config masks */
-#define IXGBE_TXDCTL_ENABLE		0x02000000 /* Ena specific Tx Queue */
-#define IXGBE_TXDCTL_SWFLSH		0x04000000 /* Tx Desc. wr-bk flushing */
-#define IXGBE_TXDCTL_WTHRESH_SHIFT	16 /* shift to WTHRESH bits */
-/* Enable short packet padding to 64 bytes */
-#define IXGBE_TX_PAD_ENABLE		0x00000400
-#define IXGBE_JUMBO_FRAME_ENABLE	0x00000004  /* Allow jumbo frames */
-/* This allows for 16K packets + 4k for vlan */
-#define IXGBE_MAX_FRAME_SZ		0x40040000
-
-#define IXGBE_TDWBAL_HEAD_WB_ENABLE	0x1 /* Tx head write-back enable */
-#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE	0x2 /* Tx seq# write-back enable */
-
-/* Receive Config masks */
-#define IXGBE_RXCTRL_RXEN		0x00000001 /* Enable Receiver */
-#define IXGBE_RXCTRL_DMBYPS		0x00000002 /* Desc Monitor Bypass */
-#define IXGBE_RXDCTL_ENABLE		0x02000000 /* Ena specific Rx Queue */
-#define IXGBE_RXDCTL_SWFLSH		0x04000000 /* Rx Desc wr-bk flushing */
-#define IXGBE_RXDCTL_RLPMLMASK		0x00003FFF /* X540 supported only */
-#define IXGBE_RXDCTL_RLPML_EN		0x00008000
-#define IXGBE_RXDCTL_VME		0x40000000 /* VLAN mode enable */
-
-#define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
-#define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
-
-#define IXGBE_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
-#define IXGBE_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
-#define IXGBE_TSYNCRXCTL_TYPE_L2_V2	0x00
-#define IXGBE_TSYNCRXCTL_TYPE_L4_V1	0x02
-#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
-#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
-#define IXGBE_TSYNCRXCTL_ENABLED	0x00000010 /* Rx Timestamping enabled */
-
-#define IXGBE_RXMTRL_V1_CTRLT_MASK	0x000000FF
-#define IXGBE_RXMTRL_V1_SYNC_MSG	0x00
-#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG	0x01
-#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG	0x02
-#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG	0x03
-#define IXGBE_RXMTRL_V1_MGMT_MSG	0x04
-
-#define IXGBE_RXMTRL_V2_MSGID_MASK	0x0000FF00
-#define IXGBE_RXMTRL_V2_SYNC_MSG	0x0000
-#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG	0x0100
-#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG	0x0200
-#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG	0x0300
-#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG	0x0800
-#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG	0x0900
-#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00
-#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG	0x0B00
-#define IXGBE_RXMTRL_V2_SIGNALLING_MSG	0x0C00
-#define IXGBE_RXMTRL_V2_MGMT_MSG	0x0D00
-
-#define IXGBE_FCTRL_SBP		0x00000002 /* Store Bad Packet */
-#define IXGBE_FCTRL_MPE		0x00000100 /* Multicast Promiscuous Ena*/
-#define IXGBE_FCTRL_UPE		0x00000200 /* Unicast Promiscuous Ena */
-#define IXGBE_FCTRL_BAM		0x00000400 /* Broadcast Accept Mode */
-#define IXGBE_FCTRL_PMCF	0x00001000 /* Pass MAC Control Frames */
-#define IXGBE_FCTRL_DPF		0x00002000 /* Discard Pause Frame */
-/* Receive Priority Flow Control Enable */
-#define IXGBE_FCTRL_RPFCE	0x00004000
-#define IXGBE_FCTRL_RFCE	0x00008000 /* Receive Flow Control Ena */
-#define IXGBE_MFLCN_PMCF	0x00000001 /* Pass MAC Control Frames */
-#define IXGBE_MFLCN_DPF		0x00000002 /* Discard Pause Frame */
-#define IXGBE_MFLCN_RPFCE	0x00000004 /* Receive Priority FC Enable */
-#define IXGBE_MFLCN_RFCE	0x00000008 /* Receive FC Enable */
-#define IXGBE_MFLCN_RPFCE_MASK	0x00000FF4 /* Rx Priority FC bitmap mask */
-#define IXGBE_MFLCN_RPFCE_SHIFT	4 /* Rx Priority FC bitmap shift */
-
-/* Multiple Receive Queue Control */
-#define IXGBE_MRQC_RSSEN	0x00000001  /* RSS Enable */
-#define IXGBE_MRQC_MRQE_MASK	0xF /* Bits 3:0 */
-#define IXGBE_MRQC_RT8TCEN	0x00000002 /* 8 TC no RSS */
-#define IXGBE_MRQC_RT4TCEN	0x00000003 /* 4 TC no RSS */
-#define IXGBE_MRQC_RTRSS8TCEN	0x00000004 /* 8 TC w/ RSS */
-#define IXGBE_MRQC_RTRSS4TCEN	0x00000005 /* 4 TC w/ RSS */
-#define IXGBE_MRQC_VMDQEN	0x00000008 /* VMDq2 64 pools no RSS */
-#define IXGBE_MRQC_VMDQRSS32EN	0x0000000A /* VMDq2 32 pools w/ RSS */
-#define IXGBE_MRQC_VMDQRSS64EN	0x0000000B /* VMDq2 64 pools w/ RSS */
-#define IXGBE_MRQC_VMDQRT8TCEN	0x0000000C /* VMDq2/RT 16 pool 8 TC */
-#define IXGBE_MRQC_VMDQRT4TCEN	0x0000000D /* VMDq2/RT 32 pool 4 TC */
-#define IXGBE_MRQC_RSS_FIELD_MASK	0xFFFF0000
-#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP	0x00010000
-#define IXGBE_MRQC_RSS_FIELD_IPV4	0x00020000
-#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000
-#define IXGBE_MRQC_RSS_FIELD_IPV6_EX	0x00080000
-#define IXGBE_MRQC_RSS_FIELD_IPV6	0x00100000
-#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP	0x00200000
-#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
-#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
-#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000
-#define IXGBE_MRQC_L3L4TXSWEN		0x00008000
-
-/* Queue Drop Enable */
-#define IXGBE_QDE_ENABLE	0x00000001
-#define IXGBE_QDE_IDX_MASK	0x00007F00
-#define IXGBE_QDE_IDX_SHIFT	8
-#define IXGBE_QDE_WRITE		0x00010000
-#define IXGBE_QDE_READ		0x00020000
-
-#define IXGBE_TXD_POPTS_IXSM	0x01 /* Insert IP checksum */
-#define IXGBE_TXD_POPTS_TXSM	0x02 /* Insert TCP/UDP checksum */
-#define IXGBE_TXD_CMD_EOP	0x01000000 /* End of Packet */
-#define IXGBE_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
-#define IXGBE_TXD_CMD_IC	0x04000000 /* Insert Checksum */
-#define IXGBE_TXD_CMD_RS	0x08000000 /* Report Status */
-#define IXGBE_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
-#define IXGBE_TXD_CMD_VLE	0x40000000 /* Add VLAN tag */
-#define IXGBE_TXD_STAT_DD	0x00000001 /* Descriptor Done */
-
-#define IXGBE_RXDADV_IPSEC_STATUS_SECP		0x00020000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH	0x10000000
-#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED	0x18000000
-#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK	0x18000000
-/* Multiple Transmit Queue Command Register */
-#define IXGBE_MTQC_RT_ENA	0x1 /* DCB Enable */
-#define IXGBE_MTQC_VT_ENA	0x2 /* VMDQ2 Enable */
-#define IXGBE_MTQC_64Q_1PB	0x0 /* 64 queues 1 pack buffer */
-#define IXGBE_MTQC_32VF		0x8 /* 4 TX Queues per pool w/32VF's */
-#define IXGBE_MTQC_64VF		0x4 /* 2 TX Queues per pool w/64VF's */
-#define IXGBE_MTQC_4TC_4TQ	0x8 /* 4 TC if RT_ENA and VT_ENA */
-#define IXGBE_MTQC_8TC_8TQ	0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */
-
-/* Receive Descriptor bit definitions */
-#define IXGBE_RXD_STAT_DD	0x01 /* Descriptor Done */
-#define IXGBE_RXD_STAT_EOP	0x02 /* End of Packet */
-#define IXGBE_RXD_STAT_FLM	0x04 /* FDir Match */
-#define IXGBE_RXD_STAT_VP	0x08 /* IEEE VLAN Packet */
-#define IXGBE_RXDADV_NEXTP_MASK	0x000FFFF0 /* Next Descriptor Index */
-#define IXGBE_RXDADV_NEXTP_SHIFT	0x00000004
-#define IXGBE_RXD_STAT_UDPCS	0x10 /* UDP xsum calculated */
-#define IXGBE_RXD_STAT_L4CS	0x20 /* L4 xsum calculated */
-#define IXGBE_RXD_STAT_IPCS	0x40 /* IP xsum calculated */
-#define IXGBE_RXD_STAT_PIF	0x80 /* passed in-exact filter */
-#define IXGBE_RXD_STAT_CRCV	0x100 /* Speculative CRC Valid */
-#define IXGBE_RXD_STAT_VEXT	0x200 /* 1st VLAN found */
-#define IXGBE_RXD_STAT_UDPV	0x400 /* Valid UDP checksum */
-#define IXGBE_RXD_STAT_DYNINT	0x800 /* Pkt caused INT via DYNINT */
-#define IXGBE_RXD_STAT_LLINT	0x800 /* Pkt caused Low Latency Interrupt */
-#define IXGBE_RXD_STAT_TS	0x10000 /* Time Stamp */
-#define IXGBE_RXD_STAT_SECP	0x20000 /* Security Processing */
-#define IXGBE_RXD_STAT_LB	0x40000 /* Loopback Status */
-#define IXGBE_RXD_STAT_ACK	0x8000 /* ACK Packet indication */
-#define IXGBE_RXD_ERR_CE	0x01 /* CRC Error */
-#define IXGBE_RXD_ERR_LE	0x02 /* Length Error */
-#define IXGBE_RXD_ERR_PE	0x08 /* Packet Error */
-#define IXGBE_RXD_ERR_OSE	0x10 /* Oversize Error */
-#define IXGBE_RXD_ERR_USE	0x20 /* Undersize Error */
-#define IXGBE_RXD_ERR_TCPE	0x40 /* TCP/UDP Checksum Error */
-#define IXGBE_RXD_ERR_IPE	0x80 /* IP Checksum Error */
-#define IXGBE_RXDADV_ERR_MASK		0xfff00000 /* RDESC.ERRORS mask */
-#define IXGBE_RXDADV_ERR_SHIFT		20 /* RDESC.ERRORS shift */
-#define IXGBE_RXDADV_ERR_RXE		0x20000000 /* Any MAC Error */
-#define IXGBE_RXDADV_ERR_FCEOFE		0x80000000 /* FCoEFe/IPE */
-#define IXGBE_RXDADV_ERR_FCERR		0x00700000 /* FCERR/FDIRERR */
-#define IXGBE_RXDADV_ERR_FDIR_LEN	0x00100000 /* FDIR Length error */
-#define IXGBE_RXDADV_ERR_FDIR_DROP	0x00200000 /* FDIR Drop error */
-#define IXGBE_RXDADV_ERR_FDIR_COLL	0x00400000 /* FDIR Collision error */
-#define IXGBE_RXDADV_ERR_HBO	0x00800000 /*Header Buffer Overflow */
-#define IXGBE_RXDADV_ERR_CE	0x01000000 /* CRC Error */
-#define IXGBE_RXDADV_ERR_LE	0x02000000 /* Length Error */
-#define IXGBE_RXDADV_ERR_PE	0x08000000 /* Packet Error */
-#define IXGBE_RXDADV_ERR_OSE	0x10000000 /* Oversize Error */
-#define IXGBE_RXDADV_ERR_USE	0x20000000 /* Undersize Error */
-#define IXGBE_RXDADV_ERR_TCPE	0x40000000 /* TCP/UDP Checksum Error */
-#define IXGBE_RXDADV_ERR_IPE	0x80000000 /* IP Checksum Error */
-#define IXGBE_RXD_VLAN_ID_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
-#define IXGBE_RXD_PRI_MASK	0xE000  /* Priority is in upper 3 bits */
-#define IXGBE_RXD_PRI_SHIFT	13
-#define IXGBE_RXD_CFI_MASK	0x1000  /* CFI is bit 12 */
-#define IXGBE_RXD_CFI_SHIFT	12
-
-#define IXGBE_RXDADV_STAT_DD		IXGBE_RXD_STAT_DD  /* Done */
-#define IXGBE_RXDADV_STAT_EOP		IXGBE_RXD_STAT_EOP /* End of Packet */
-#define IXGBE_RXDADV_STAT_FLM		IXGBE_RXD_STAT_FLM /* FDir Match */
-#define IXGBE_RXDADV_STAT_VP		IXGBE_RXD_STAT_VP  /* IEEE VLAN Pkt */
-#define IXGBE_RXDADV_STAT_MASK		0x000fffff /* Stat/NEXTP: bit 0-19 */
-#define IXGBE_RXDADV_STAT_FCEOFS	0x00000040 /* FCoE EOF/SOF Stat */
-#define IXGBE_RXDADV_STAT_FCSTAT	0x00000030 /* FCoE Pkt Stat */
-#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH	0x00000000 /* 00: No Ctxt Match */
-#define IXGBE_RXDADV_STAT_FCSTAT_NODDP	0x00000010 /* 01: Ctxt w/o DDP */
-#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP	0x00000020 /* 10: Recv. FCP_RSP */
-#define IXGBE_RXDADV_STAT_FCSTAT_DDP	0x00000030 /* 11: Ctxt w/ DDP */
-#define IXGBE_RXDADV_STAT_TS		0x00010000 /* IEEE1588 Time Stamp */
-
-/* PSRTYPE bit definitions */
-#define IXGBE_PSRTYPE_TCPHDR	0x00000010
-#define IXGBE_PSRTYPE_UDPHDR	0x00000020
-#define IXGBE_PSRTYPE_IPV4HDR	0x00000100
-#define IXGBE_PSRTYPE_IPV6HDR	0x00000200
-#define IXGBE_PSRTYPE_L2HDR	0x00001000
-
-/* SRRCTL bit definitions */
-#define IXGBE_SRRCTL_BSIZEPKT_SHIFT	10 /* so many KBs */
-#define IXGBE_SRRCTL_RDMTS_SHIFT	22
-#define IXGBE_SRRCTL_RDMTS_MASK		0x01C00000
-#define IXGBE_SRRCTL_DROP_EN		0x10000000
-#define IXGBE_SRRCTL_BSIZEPKT_MASK	0x0000007F
-#define IXGBE_SRRCTL_BSIZEHDR_MASK	0x00003F00
-#define IXGBE_SRRCTL_DESCTYPE_LEGACY	0x00000000
-#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
-#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT	0x04000000
-#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
-#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
-#define IXGBE_SRRCTL_DESCTYPE_MASK	0x0E000000
-
-#define IXGBE_RXDPS_HDRSTAT_HDRSP	0x00008000
-#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK	0x000003FF
-
-#define IXGBE_RXDADV_RSSTYPE_MASK	0x0000000F
-#define IXGBE_RXDADV_PKTTYPE_MASK	0x0000FFF0
-#define IXGBE_RXDADV_PKTTYPE_MASK_EX	0x0001FFF0
-#define IXGBE_RXDADV_HDRBUFLEN_MASK	0x00007FE0
-#define IXGBE_RXDADV_RSCCNT_MASK	0x001E0000
-#define IXGBE_RXDADV_RSCCNT_SHIFT	17
-#define IXGBE_RXDADV_HDRBUFLEN_SHIFT	5
-#define IXGBE_RXDADV_SPLITHEADER_EN	0x00001000
-#define IXGBE_RXDADV_SPH		0x8000
-
-/* RSS Hash results */
-#define IXGBE_RXDADV_RSSTYPE_NONE	0x00000000
-#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
-#define IXGBE_RXDADV_RSSTYPE_IPV4	0x00000002
-#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
-#define IXGBE_RXDADV_RSSTYPE_IPV6_EX	0x00000004
-#define IXGBE_RXDADV_RSSTYPE_IPV6	0x00000005
-#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
-#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
-#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
-#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
-
-/* RSS Packet Types as indicated in the receive descriptor. */
-#define IXGBE_RXDADV_PKTTYPE_NONE	0x00000000
-#define IXGBE_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPv4 hdr present */
-#define IXGBE_RXDADV_PKTTYPE_IPV4_EX	0x00000020 /* IPv4 hdr + extensions */
-#define IXGBE_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPv6 hdr present */
-#define IXGBE_RXDADV_PKTTYPE_IPV6_EX	0x00000080 /* IPv6 hdr + extensions */
-#define IXGBE_RXDADV_PKTTYPE_TCP	0x00000100 /* TCP hdr present */
-#define IXGBE_RXDADV_PKTTYPE_UDP	0x00000200 /* UDP hdr present */
-#define IXGBE_RXDADV_PKTTYPE_SCTP	0x00000400 /* SCTP hdr present */
-#define IXGBE_RXDADV_PKTTYPE_NFS	0x00000800 /* NFS hdr present */
-#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
-#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
-#define IXGBE_RXDADV_PKTTYPE_LINKSEC	0x00004000 /* LinkSec Encap */
-#define IXGBE_RXDADV_PKTTYPE_ETQF	0x00008000 /* PKTTYPE is ETQF index */
-#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK	0x00000070 /* ETQF has 8 indices */
-#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT	4 /* Right-shift 4 bits */
-
-/* Security Processing bit Indication */
-#define IXGBE_RXDADV_LNKSEC_STATUS_SECP		0x00020000
-#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH	0x08000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR	0x10000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK	0x18000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG	0x18000000
-
-/* Masks to determine if packets should be dropped due to frame errors */
-#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
-				IXGBE_RXD_ERR_CE | \
-				IXGBE_RXD_ERR_LE | \
-				IXGBE_RXD_ERR_PE | \
-				IXGBE_RXD_ERR_OSE | \
-				IXGBE_RXD_ERR_USE)
-
-#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
-				IXGBE_RXDADV_ERR_CE | \
-				IXGBE_RXDADV_ERR_LE | \
-				IXGBE_RXDADV_ERR_PE | \
-				IXGBE_RXDADV_ERR_OSE | \
-				IXGBE_RXDADV_ERR_USE)
-
-#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599	IXGBE_RXDADV_ERR_RXE
-
-/* Multicast bit mask */
-#define IXGBE_MCSTCTRL_MFE	0x4
-
-/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE	8
-#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE	8
-#define IXGBE_REQ_TX_BUFFER_GRANULARITY		1024
-
-/* Vlan-specific macros */
-#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK	0x0FFF /* VLAN ID in lower 12 bits */
-#define IXGBE_RX_DESC_SPECIAL_PRI_MASK	0xE000 /* Priority in upper 3 bits */
-#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT	0x000D /* Priority in upper 3 of 16 */
-#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT	IXGBE_RX_DESC_SPECIAL_PRI_SHIFT
-
-/* SR-IOV specific macros */
-#define IXGBE_MBVFICR_INDEX(vf_number)	(vf_number >> 4)
-#define IXGBE_MBVFICR(_i)		(0x00710 + ((_i) * 4))
-#define IXGBE_VFLRE(_i)			(((_i & 1) ? 0x001C0 : 0x00600))
-#define IXGBE_VFLREC(_i)		 (0x00700 + ((_i) * 4))
-/* Translated register #defines */
-#define IXGBE_PVFCTRL(P)	(0x00300 + (4 * (P)))
-#define IXGBE_PVFSTATUS(P)	(0x00008 + (0 * (P)))
-#define IXGBE_PVFLINKS(P)	(0x042A4 + (0 * (P)))
-#define IXGBE_PVFRTIMER(P)	(0x00048 + (0 * (P)))
-#define IXGBE_PVFMAILBOX(P)	(0x04C00 + (4 * (P)))
-#define IXGBE_PVFRXMEMWRAP(P)	(0x03190 + (0 * (P)))
-#define IXGBE_PVTEICR(P)	(0x00B00 + (4 * (P)))
-#define IXGBE_PVTEICS(P)	(0x00C00 + (4 * (P)))
-#define IXGBE_PVTEIMS(P)	(0x00D00 + (4 * (P)))
-#define IXGBE_PVTEIMC(P)	(0x00E00 + (4 * (P)))
-#define IXGBE_PVTEIAC(P)	(0x00F00 + (4 * (P)))
-#define IXGBE_PVTEIAM(P)	(0x04D00 + (4 * (P)))
-#define IXGBE_PVTEITR(P)	(((P) < 24) ? (0x00820 + ((P) * 4)) : \
-				 (0x012300 + (((P) - 24) * 4)))
-#define IXGBE_PVTIVAR(P)	(0x12500 + (4 * (P)))
-#define IXGBE_PVTIVAR_MISC(P)	(0x04E00 + (4 * (P)))
-#define IXGBE_PVTRSCINT(P)	(0x12000 + (4 * (P)))
-#define IXGBE_VFPBACL(P)	(0x110C8 + (4 * (P)))
-#define IXGBE_PVFRDBAL(P)	((P < 64) ? (0x01000 + (0x40 * (P))) \
-				 : (0x0D000 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFRDBAH(P)	((P < 64) ? (0x01004 + (0x40 * (P))) \
-				 : (0x0D004 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFRDLEN(P)	((P < 64) ? (0x01008 + (0x40 * (P))) \
-				 : (0x0D008 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFRDH(P)		((P < 64) ? (0x01010 + (0x40 * (P))) \
-				 : (0x0D010 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFRDT(P)		((P < 64) ? (0x01018 + (0x40 * (P))) \
-				 : (0x0D018 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFRXDCTL(P)	((P < 64) ? (0x01028 + (0x40 * (P))) \
-				 : (0x0D028 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFSRRCTL(P)	((P < 64) ? (0x01014 + (0x40 * (P))) \
-				 : (0x0D014 + (0x40 * ((P) - 64))))
-#define IXGBE_PVFPSRTYPE(P)	(0x0EA00 + (4 * (P)))
-#define IXGBE_PVFTDBAL(P)	(0x06000 + (0x40 * (P)))
-#define IXGBE_PVFTDBAH(P)	(0x06004 + (0x40 * (P)))
-#define IXGBE_PVFTTDLEN(P)	(0x06008 + (0x40 * (P)))
-#define IXGBE_PVFTDH(P)		(0x06010 + (0x40 * (P)))
-#define IXGBE_PVFTDT(P)		(0x06018 + (0x40 * (P)))
-#define IXGBE_PVFTXDCTL(P)	(0x06028 + (0x40 * (P)))
-#define IXGBE_PVFTDWBAL(P)	(0x06038 + (0x40 * (P)))
-#define IXGBE_PVFTDWBAH(P)	(0x0603C + (0x40 * (P)))
-#define IXGBE_PVFDCA_RXCTRL(P)	(((P) < 64) ? (0x0100C + (0x40 * (P))) \
-				 : (0x0D00C + (0x40 * ((P) - 64))))
-#define IXGBE_PVFDCA_TXCTRL(P)	(0x0600C + (0x40 * (P)))
-#define IXGBE_PVFGPRC(x)	(0x0101C + (0x40 * (x)))
-#define IXGBE_PVFGPTC(x)	(0x08300 + (0x04 * (x)))
-#define IXGBE_PVFGORC_LSB(x)	(0x01020 + (0x40 * (x)))
-#define IXGBE_PVFGORC_MSB(x)	(0x0D020 + (0x40 * (x)))
-#define IXGBE_PVFGOTC_LSB(x)	(0x08400 + (0x08 * (x)))
-#define IXGBE_PVFGOTC_MSB(x)	(0x08404 + (0x08 * (x)))
-#define IXGBE_PVFMPRC(x)	(0x0D01C + (0x40 * (x)))
-
-#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
-		(IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))
-#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \
-		(IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index)))
-
-/* Little Endian defines */
-#ifndef __le16
-#define __le16  u16
-#endif
-#ifndef __le32
-#define __le32  u32
-#endif
-#ifndef __le64
-#define __le64  u64
-
-#endif
-#ifndef __be16
-/* Big Endian defines */
-#define __be16  u16
-#define __be32  u32
-#define __be64  u64
-
-#endif
-enum ixgbe_fdir_pballoc_type {
-	IXGBE_FDIR_PBALLOC_NONE = 0,
-	IXGBE_FDIR_PBALLOC_64K  = 1,
-	IXGBE_FDIR_PBALLOC_128K = 2,
-	IXGBE_FDIR_PBALLOC_256K = 3,
-};
-
-/* Flow Director register values */
-#define IXGBE_FDIRCTRL_PBALLOC_64K		0x00000001
-#define IXGBE_FDIRCTRL_PBALLOC_128K		0x00000002
-#define IXGBE_FDIRCTRL_PBALLOC_256K		0x00000003
-#define IXGBE_FDIRCTRL_INIT_DONE		0x00000008
-#define IXGBE_FDIRCTRL_PERFECT_MATCH		0x00000010
-#define IXGBE_FDIRCTRL_REPORT_STATUS		0x00000020
-#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS	0x00000080
-#define IXGBE_FDIRCTRL_DROP_Q_SHIFT		8
-#define IXGBE_FDIRCTRL_FLEX_SHIFT		16
-#define IXGBE_FDIRCTRL_SEARCHLIM		0x00800000
-#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT		24
-#define IXGBE_FDIRCTRL_FULL_THRESH_MASK		0xF0000000
-#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT	28
-
-#define IXGBE_FDIRTCPM_DPORTM_SHIFT		16
-#define IXGBE_FDIRUDPM_DPORTM_SHIFT		16
-#define IXGBE_FDIRIP6M_DIPM_SHIFT		16
-#define IXGBE_FDIRM_VLANID			0x00000001
-#define IXGBE_FDIRM_VLANP			0x00000002
-#define IXGBE_FDIRM_POOL			0x00000004
-#define IXGBE_FDIRM_L4P				0x00000008
-#define IXGBE_FDIRM_FLEX			0x00000010
-#define IXGBE_FDIRM_DIPv6			0x00000020
-
-#define IXGBE_FDIRFREE_FREE_MASK		0xFFFF
-#define IXGBE_FDIRFREE_FREE_SHIFT		0
-#define IXGBE_FDIRFREE_COLL_MASK		0x7FFF0000
-#define IXGBE_FDIRFREE_COLL_SHIFT		16
-#define IXGBE_FDIRLEN_MAXLEN_MASK		0x3F
-#define IXGBE_FDIRLEN_MAXLEN_SHIFT		0
-#define IXGBE_FDIRLEN_MAXHASH_MASK		0x7FFF0000
-#define IXGBE_FDIRLEN_MAXHASH_SHIFT		16
-#define IXGBE_FDIRUSTAT_ADD_MASK		0xFFFF
-#define IXGBE_FDIRUSTAT_ADD_SHIFT		0
-#define IXGBE_FDIRUSTAT_REMOVE_MASK		0xFFFF0000
-#define IXGBE_FDIRUSTAT_REMOVE_SHIFT		16
-#define IXGBE_FDIRFSTAT_FADD_MASK		0x00FF
-#define IXGBE_FDIRFSTAT_FADD_SHIFT		0
-#define IXGBE_FDIRFSTAT_FREMOVE_MASK		0xFF00
-#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT		8
-#define IXGBE_FDIRPORT_DESTINATION_SHIFT	16
-#define IXGBE_FDIRVLAN_FLEX_SHIFT		16
-#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT	15
-#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT	16
-
-#define IXGBE_FDIRCMD_CMD_MASK			0x00000003
-#define IXGBE_FDIRCMD_CMD_ADD_FLOW		0x00000001
-#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW		0x00000002
-#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT	0x00000003
-#define IXGBE_FDIRCMD_FILTER_VALID		0x00000004
-#define IXGBE_FDIRCMD_FILTER_UPDATE		0x00000008
-#define IXGBE_FDIRCMD_IPv6DMATCH		0x00000010
-#define IXGBE_FDIRCMD_L4TYPE_UDP		0x00000020
-#define IXGBE_FDIRCMD_L4TYPE_TCP		0x00000040
-#define IXGBE_FDIRCMD_L4TYPE_SCTP		0x00000060
-#define IXGBE_FDIRCMD_IPV6			0x00000080
-#define IXGBE_FDIRCMD_CLEARHT			0x00000100
-#define IXGBE_FDIRCMD_DROP			0x00000200
-#define IXGBE_FDIRCMD_INT			0x00000400
-#define IXGBE_FDIRCMD_LAST			0x00000800
-#define IXGBE_FDIRCMD_COLLISION			0x00001000
-#define IXGBE_FDIRCMD_QUEUE_EN			0x00008000
-#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT		5
-#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT		16
-#define IXGBE_FDIRCMD_VT_POOL_SHIFT		24
-#define IXGBE_FDIR_INIT_DONE_POLL		10
-#define IXGBE_FDIRCMD_CMD_POLL			10
-
-#define IXGBE_FDIR_DROP_QUEUE			127
-
-#define IXGBE_STATUS_OVERHEATING_BIT		20 /* STATUS overtemp bit num */
-
-/* Manageablility Host Interface defines */
-#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH	1792 /* Num of bytes in range */
-#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH	448 /* Num of dwords in range */
-#define IXGBE_HI_COMMAND_TIMEOUT	500 /* Process HI command limit */
-
-/* CEM Support */
-#define FW_CEM_HDR_LEN			0x4
-#define FW_CEM_CMD_DRIVER_INFO		0xDD
-#define FW_CEM_CMD_DRIVER_INFO_LEN	0x5
-#define FW_CEM_CMD_RESERVED		0X0
-#define FW_CEM_UNUSED_VER		0x0
-#define FW_CEM_MAX_RETRIES		3
-#define FW_CEM_RESP_STATUS_SUCCESS	0x1
-
-/* Host Interface Command Structures */
-
-struct ixgbe_hic_hdr {
-	u8 cmd;
-	u8 buf_len;
-	union {
-		u8 cmd_resv;
-		u8 ret_status;
-	} cmd_or_resp;
-	u8 checksum;
-};
-
-struct ixgbe_hic_drv_info {
-	struct ixgbe_hic_hdr hdr;
-	u8 port_num;
-	u8 ver_sub;
-	u8 ver_build;
-	u8 ver_min;
-	u8 ver_maj;
-	u8 pad; /* end spacing to ensure length is mult. of dword */
-	u16 pad2; /* end spacing to ensure length is mult. of dword2 */
-};
-
-/* Transmit Descriptor - Legacy */
-struct ixgbe_legacy_tx_desc {
-	u64 buffer_addr; /* Address of the descriptor's data buffer */
-	union {
-		__le32 data;
-		struct {
-			__le16 length; /* Data buffer length */
-			u8 cso; /* Checksum offset */
-			u8 cmd; /* Descriptor control */
-		} flags;
-	} lower;
-	union {
-		__le32 data;
-		struct {
-			u8 status; /* Descriptor status */
-			u8 css; /* Checksum start */
-			__le16 vlan;
-		} fields;
-	} upper;
-};
-
-/* Transmit Descriptor - Advanced */
-union ixgbe_adv_tx_desc {
-	struct {
-		__le64 buffer_addr; /* Address of descriptor's data buf */
-		__le32 cmd_type_len;
-		__le32 olinfo_status;
-	} read;
-	struct {
-		__le64 rsvd; /* Reserved */
-		__le32 nxtseq_seed;
-		__le32 status;
-	} wb;
-};
-
-/* Receive Descriptor - Legacy */
-struct ixgbe_legacy_rx_desc {
-	__le64 buffer_addr; /* Address of the descriptor's data buffer */
-	__le16 length; /* Length of data DMAed into data buffer */
-	__le16 csum; /* Packet checksum */
-	u8 status;   /* Descriptor status */
-	u8 errors;   /* Descriptor Errors */
-	__le16 vlan;
-};
-
-/* Receive Descriptor - Advanced */
-union ixgbe_adv_rx_desc {
-	struct {
-		__le64 pkt_addr; /* Packet buffer address */
-		__le64 hdr_addr; /* Header buffer address */
-	} read;
-	struct {
-		struct {
-			union {
-				__le32 data;
-				struct {
-					__le16 pkt_info; /* RSS, Pkt type */
-					__le16 hdr_info; /* Splithdr, hdrlen */
-				} hs_rss;
-			} lo_dword;
-			union {
-				__le32 rss; /* RSS Hash */
-				struct {
-					__le16 ip_id; /* IP id */
-					__le16 csum; /* Packet Checksum */
-				} csum_ip;
-			} hi_dword;
-		} lower;
-		struct {
-			__le32 status_error; /* ext status/error */
-			__le16 length; /* Packet length */
-			__le16 vlan; /* VLAN tag */
-		} upper;
-	} wb;  /* writeback */
-};
-
-/* Context descriptors */
-struct ixgbe_adv_tx_context_desc {
-	__le32 vlan_macip_lens;
-	__le32 seqnum_seed;
-	__le32 type_tucmd_mlhl;
-	__le32 mss_l4len_idx;
-};
-
-/* Adv Transmit Descriptor Config Masks */
-#define IXGBE_ADVTXD_DTALEN_MASK	0x0000FFFF /* Data buf length(bytes) */
-#define IXGBE_ADVTXD_MAC_LINKSEC	0x00040000 /* Insert LinkSec */
-#define IXGBE_ADVTXD_MAC_TSTAMP		0x00080000 /* IEEE1588 time stamp */
-#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */
-#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK	0x000001FF /* IPSec ESP length */
-#define IXGBE_ADVTXD_DTYP_MASK		0x00F00000 /* DTYP mask */
-#define IXGBE_ADVTXD_DTYP_CTXT		0x00200000 /* Adv Context Desc */
-#define IXGBE_ADVTXD_DTYP_DATA		0x00300000 /* Adv Data Descriptor */
-#define IXGBE_ADVTXD_DCMD_EOP		IXGBE_TXD_CMD_EOP  /* End of Packet */
-#define IXGBE_ADVTXD_DCMD_IFCS		IXGBE_TXD_CMD_IFCS /* Insert FCS */
-#define IXGBE_ADVTXD_DCMD_RS		IXGBE_TXD_CMD_RS /* Report Status */
-#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI	0x10000000 /* DDP hdr type or iSCSI */
-#define IXGBE_ADVTXD_DCMD_DEXT		IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */
-#define IXGBE_ADVTXD_DCMD_VLE		IXGBE_TXD_CMD_VLE  /* VLAN pkt enable */
-#define IXGBE_ADVTXD_DCMD_TSE		0x80000000 /* TCP Seg enable */
-#define IXGBE_ADVTXD_STAT_DD		IXGBE_TXD_STAT_DD  /* Descriptor Done */
-#define IXGBE_ADVTXD_STAT_SN_CRC	0x00000002 /* NXTSEQ/SEED pres in WB */
-#define IXGBE_ADVTXD_STAT_RSV		0x0000000C /* STA Reserved */
-#define IXGBE_ADVTXD_IDX_SHIFT		4 /* Adv desc Index shift */
-#define IXGBE_ADVTXD_CC			0x00000080 /* Check Context */
-#define IXGBE_ADVTXD_POPTS_SHIFT	8  /* Adv desc POPTS shift */
-#define IXGBE_ADVTXD_POPTS_IXSM		(IXGBE_TXD_POPTS_IXSM << \
-					 IXGBE_ADVTXD_POPTS_SHIFT)
-#define IXGBE_ADVTXD_POPTS_TXSM		(IXGBE_TXD_POPTS_TXSM << \
-					 IXGBE_ADVTXD_POPTS_SHIFT)
-#define IXGBE_ADVTXD_POPTS_ISCO_1ST	0x00000000 /* 1st TSO of iSCSI PDU */
-#define IXGBE_ADVTXD_POPTS_ISCO_MDL	0x00000800 /* Middle TSO of iSCSI PDU */
-#define IXGBE_ADVTXD_POPTS_ISCO_LAST	0x00001000 /* Last TSO of iSCSI PDU */
-/* 1st&Last TSO-full iSCSI PDU */
-#define IXGBE_ADVTXD_POPTS_ISCO_FULL	0x00001800
-#define IXGBE_ADVTXD_POPTS_RSV		0x00002000 /* POPTS Reserved */
-#define IXGBE_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
-#define IXGBE_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
-#define IXGBE_ADVTXD_VLAN_SHIFT		16  /* Adv ctxt vlan tag shift */
-#define IXGBE_ADVTXD_TUCMD_IPV4		0x00000400 /* IP Packet Type: 1=IPv4 */
-#define IXGBE_ADVTXD_TUCMD_IPV6		0x00000000 /* IP Packet Type: 0=IPv6 */
-#define IXGBE_ADVTXD_TUCMD_L4T_UDP	0x00000000 /* L4 Packet TYPE of UDP */
-#define IXGBE_ADVTXD_TUCMD_L4T_TCP	0x00000800 /* L4 Packet TYPE of TCP */
-#define IXGBE_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 Packet TYPE of SCTP */
-#define IXGBE_ADVTXD_TUCMD_MKRREQ	0x00002000 /* req Markers and CRC */
-#define IXGBE_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
-#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
-#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
-#define IXGBE_ADVTXT_TUCMD_FCOE		0x00008000 /* FCoE Frame Type */
-#define IXGBE_ADVTXD_FCOEF_EOF_MASK	(0x3 << 10) /* FC EOF index */
-#define IXGBE_ADVTXD_FCOEF_SOF		((1 << 2) << 10) /* FC SOF index */
-#define IXGBE_ADVTXD_FCOEF_PARINC	((1 << 3) << 10) /* Rel_Off in F_CTL */
-#define IXGBE_ADVTXD_FCOEF_ORIE		((1 << 4) << 10) /* Orientation End */
-#define IXGBE_ADVTXD_FCOEF_ORIS		((1 << 5) << 10) /* Orientation Start */
-#define IXGBE_ADVTXD_FCOEF_EOF_N	(0x0 << 10) /* 00: EOFn */
-#define IXGBE_ADVTXD_FCOEF_EOF_T	(0x1 << 10) /* 01: EOFt */
-#define IXGBE_ADVTXD_FCOEF_EOF_NI	(0x2 << 10) /* 10: EOFni */
-#define IXGBE_ADVTXD_FCOEF_EOF_A	(0x3 << 10) /* 11: EOFa */
-#define IXGBE_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
-#define IXGBE_ADVTXD_MSS_SHIFT		16  /* Adv ctxt MSS shift */
-
-/* Autonegotiation advertised speeds */
-typedef u32 ixgbe_autoneg_advertised;
-/* Link speed */
-typedef u32 ixgbe_link_speed;
-#define IXGBE_LINK_SPEED_UNKNOWN	0
-#define IXGBE_LINK_SPEED_100_FULL	0x0008
-#define IXGBE_LINK_SPEED_1GB_FULL	0x0020
-#define IXGBE_LINK_SPEED_10GB_FULL	0x0080
-#define IXGBE_LINK_SPEED_82598_AUTONEG	(IXGBE_LINK_SPEED_1GB_FULL | \
-					 IXGBE_LINK_SPEED_10GB_FULL)
-#define IXGBE_LINK_SPEED_82599_AUTONEG	(IXGBE_LINK_SPEED_100_FULL | \
-					 IXGBE_LINK_SPEED_1GB_FULL | \
-					 IXGBE_LINK_SPEED_10GB_FULL)
-
-
-/* Physical layer type */
-typedef u32 ixgbe_physical_layer;
-#define IXGBE_PHYSICAL_LAYER_UNKNOWN		0
-#define IXGBE_PHYSICAL_LAYER_10GBASE_T		0x0001
-#define IXGBE_PHYSICAL_LAYER_1000BASE_T		0x0002
-#define IXGBE_PHYSICAL_LAYER_100BASE_TX		0x0004
-#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU	0x0008
-#define IXGBE_PHYSICAL_LAYER_10GBASE_LR		0x0010
-#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM	0x0020
-#define IXGBE_PHYSICAL_LAYER_10GBASE_SR		0x0040
-#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4	0x0080
-#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4	0x0100
-#define IXGBE_PHYSICAL_LAYER_1000BASE_KX	0x0200
-#define IXGBE_PHYSICAL_LAYER_1000BASE_BX	0x0400
-#define IXGBE_PHYSICAL_LAYER_10GBASE_KR		0x0800
-#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI	0x1000
-#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA	0x2000
-#define IXGBE_PHYSICAL_LAYER_1000BASE_SX	0x4000
-
-/* Flow Control Data Sheet defined values
- * Calculation and defines taken from 802.1bb Annex O
- */
-
-/* BitTimes (BT) conversion */
-#define IXGBE_BT2KB(BT)		((BT + (8 * 1024 - 1)) / (8 * 1024))
-#define IXGBE_B2BT(BT)		(BT * 8)
-
-/* Calculate Delay to respond to PFC */
-#define IXGBE_PFC_D	672
-
-/* Calculate Cable Delay */
-#define IXGBE_CABLE_DC	5556 /* Delay Copper */
-#define IXGBE_CABLE_DO	5000 /* Delay Optical */
-
-/* Calculate Interface Delay X540 */
-#define IXGBE_PHY_DC	25600 /* Delay 10G BASET */
-#define IXGBE_MAC_DC	8192  /* Delay Copper XAUI interface */
-#define IXGBE_XAUI_DC	(2 * 2048) /* Delay Copper Phy */
-
-#define IXGBE_ID_X540	(IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC)
-
-/* Calculate Interface Delay 82598, 82599 */
-#define IXGBE_PHY_D	12800
-#define IXGBE_MAC_D	4096
-#define IXGBE_XAUI_D	(2 * 1024)
-
-#define IXGBE_ID	(IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D)
-
-/* Calculate Delay incurred from higher layer */
-#define IXGBE_HD	6144
-
-/* Calculate PCI Bus delay for low thresholds */
-#define IXGBE_PCI_DELAY	10000
-
-/* Calculate X540 delay value in bit times */
-#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \
-			((36 * \
-			  (IXGBE_B2BT(_max_frame_link) + \
-			   IXGBE_PFC_D + \
-			   (2 * IXGBE_CABLE_DC) + \
-			   (2 * IXGBE_ID_X540) + \
-			   IXGBE_HD) / 25 + 1) + \
-			 2 * IXGBE_B2BT(_max_frame_tc))
-
-/* Calculate 82599, 82598 delay value in bit times */
-#define IXGBE_DV(_max_frame_link, _max_frame_tc) \
-			((36 * \
-			  (IXGBE_B2BT(_max_frame_link) + \
-			   IXGBE_PFC_D + \
-			   (2 * IXGBE_CABLE_DC) + \
-			   (2 * IXGBE_ID) + \
-			   IXGBE_HD) / 25 + 1) + \
-			 2 * IXGBE_B2BT(_max_frame_tc))
-
-/* Calculate low threshold delay values */
-#define IXGBE_LOW_DV_X540(_max_frame_tc) \
-			(2 * IXGBE_B2BT(_max_frame_tc) + \
-			(36 * IXGBE_PCI_DELAY / 25) + 1)
-#define IXGBE_LOW_DV(_max_frame_tc) \
-			(2 * IXGBE_LOW_DV_X540(_max_frame_tc))
-
-/* Software ATR hash keys */
-#define IXGBE_ATR_BUCKET_HASH_KEY	0x3DAD14E2
-#define IXGBE_ATR_SIGNATURE_HASH_KEY	0x174D3614
-
-/* Software ATR input stream values and masks */
-#define IXGBE_ATR_HASH_MASK		0x7fff
-#define IXGBE_ATR_L4TYPE_MASK		0x3
-#define IXGBE_ATR_L4TYPE_UDP		0x1
-#define IXGBE_ATR_L4TYPE_TCP		0x2
-#define IXGBE_ATR_L4TYPE_SCTP		0x3
-#define IXGBE_ATR_L4TYPE_IPV6_MASK	0x4
-enum ixgbe_atr_flow_type {
-	IXGBE_ATR_FLOW_TYPE_IPV4	= 0x0,
-	IXGBE_ATR_FLOW_TYPE_UDPV4	= 0x1,
-	IXGBE_ATR_FLOW_TYPE_TCPV4	= 0x2,
-	IXGBE_ATR_FLOW_TYPE_SCTPV4	= 0x3,
-	IXGBE_ATR_FLOW_TYPE_IPV6	= 0x4,
-	IXGBE_ATR_FLOW_TYPE_UDPV6	= 0x5,
-	IXGBE_ATR_FLOW_TYPE_TCPV6	= 0x6,
-	IXGBE_ATR_FLOW_TYPE_SCTPV6	= 0x7,
-};
-
-/* Flow Director ATR input struct. */
-union ixgbe_atr_input {
-	/*
-	 * Byte layout in order, all values with MSB first:
-	 *
-	 * vm_pool	- 1 byte
-	 * flow_type	- 1 byte
-	 * vlan_id	- 2 bytes
-	 * src_ip	- 16 bytes
-	 * dst_ip	- 16 bytes
-	 * src_port	- 2 bytes
-	 * dst_port	- 2 bytes
-	 * flex_bytes	- 2 bytes
-	 * bkt_hash	- 2 bytes
-	 */
-	struct {
-		u8 vm_pool;
-		u8 flow_type;
-		__be16 vlan_id;
-		__be32 dst_ip[4];
-		__be32 src_ip[4];
-		__be16 src_port;
-		__be16 dst_port;
-		__be16 flex_bytes;
-		__be16 bkt_hash;
-	} formatted;
-	__be32 dword_stream[11];
-};
-
-/* Flow Director compressed ATR hash input struct */
-union ixgbe_atr_hash_dword {
-	struct {
-		u8 vm_pool;
-		u8 flow_type;
-		__be16 vlan_id;
-	} formatted;
-	__be32 ip;
-	struct {
-		__be16 src;
-		__be16 dst;
-	} port;
-	__be16 flex_bytes;
-	__be32 dword;
-};
-
-
-/*
- * Unavailable: The FCoE Boot Option ROM is not present in the flash.
- * Disabled: Present; boot order is not set for any targets on the port.
- * Enabled: Present; boot order is set for at least one target on the port.
- */
-enum ixgbe_fcoe_boot_status {
-	ixgbe_fcoe_bootstatus_disabled = 0,
-	ixgbe_fcoe_bootstatus_enabled = 1,
-	ixgbe_fcoe_bootstatus_unavailable = 0xFFFF
-};
-
-enum ixgbe_eeprom_type {
-	ixgbe_eeprom_uninitialized = 0,
-	ixgbe_eeprom_spi,
-	ixgbe_flash,
-	ixgbe_eeprom_none /* No NVM support */
-};
-
-enum ixgbe_mac_type {
-	ixgbe_mac_unknown = 0,
-	ixgbe_mac_82598EB,
-	ixgbe_mac_82599EB,
-	ixgbe_mac_X540,
-	ixgbe_num_macs
-};
-
-enum ixgbe_phy_type {
-	ixgbe_phy_unknown = 0,
-	ixgbe_phy_none,
-	ixgbe_phy_tn,
-	ixgbe_phy_aq,
-	ixgbe_phy_cu_unknown,
-	ixgbe_phy_qt,
-	ixgbe_phy_xaui,
-	ixgbe_phy_nl,
-	ixgbe_phy_sfp_passive_tyco,
-	ixgbe_phy_sfp_passive_unknown,
-	ixgbe_phy_sfp_active_unknown,
-	ixgbe_phy_sfp_avago,
-	ixgbe_phy_sfp_ftl,
-	ixgbe_phy_sfp_ftl_active,
-	ixgbe_phy_sfp_unknown,
-	ixgbe_phy_sfp_intel,
-	ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/
-	ixgbe_phy_generic
-};
-
-/*
- * SFP+ module type IDs:
- *
- * ID	Module Type
- * =============
- * 0	SFP_DA_CU
- * 1	SFP_SR
- * 2	SFP_LR
- * 3	SFP_DA_CU_CORE0 - 82599-specific
- * 4	SFP_DA_CU_CORE1 - 82599-specific
- * 5	SFP_SR/LR_CORE0 - 82599-specific
- * 6	SFP_SR/LR_CORE1 - 82599-specific
- */
-enum ixgbe_sfp_type {
-	ixgbe_sfp_type_da_cu = 0,
-	ixgbe_sfp_type_sr = 1,
-	ixgbe_sfp_type_lr = 2,
-	ixgbe_sfp_type_da_cu_core0 = 3,
-	ixgbe_sfp_type_da_cu_core1 = 4,
-	ixgbe_sfp_type_srlr_core0 = 5,
-	ixgbe_sfp_type_srlr_core1 = 6,
-	ixgbe_sfp_type_da_act_lmt_core0 = 7,
-	ixgbe_sfp_type_da_act_lmt_core1 = 8,
-	ixgbe_sfp_type_1g_cu_core0 = 9,
-	ixgbe_sfp_type_1g_cu_core1 = 10,
-	ixgbe_sfp_type_1g_sx_core0 = 11,
-	ixgbe_sfp_type_1g_sx_core1 = 12,
-	ixgbe_sfp_type_not_present = 0xFFFE,
-	ixgbe_sfp_type_unknown = 0xFFFF
-};
-
-enum ixgbe_media_type {
-	ixgbe_media_type_unknown = 0,
-	ixgbe_media_type_fiber,
-	ixgbe_media_type_fiber_qsfp,
-	ixgbe_media_type_fiber_lco,
-	ixgbe_media_type_copper,
-	ixgbe_media_type_backplane,
-	ixgbe_media_type_cx4,
-	ixgbe_media_type_virtual
-};
-
-/* Flow Control Settings */
-enum ixgbe_fc_mode {
-	ixgbe_fc_none = 0,
-	ixgbe_fc_rx_pause,
-	ixgbe_fc_tx_pause,
-	ixgbe_fc_full,
-	ixgbe_fc_default
-};
-
-/* Smart Speed Settings */
-#define IXGBE_SMARTSPEED_MAX_RETRIES	3
-enum ixgbe_smart_speed {
-	ixgbe_smart_speed_auto = 0,
-	ixgbe_smart_speed_on,
-	ixgbe_smart_speed_off
-};
-
-/* PCI bus types */
-enum ixgbe_bus_type {
-	ixgbe_bus_type_unknown = 0,
-	ixgbe_bus_type_pci,
-	ixgbe_bus_type_pcix,
-	ixgbe_bus_type_pci_express,
-	ixgbe_bus_type_reserved
-};
-
-/* PCI bus speeds */
-enum ixgbe_bus_speed {
-	ixgbe_bus_speed_unknown	= 0,
-	ixgbe_bus_speed_33	= 33,
-	ixgbe_bus_speed_66	= 66,
-	ixgbe_bus_speed_100	= 100,
-	ixgbe_bus_speed_120	= 120,
-	ixgbe_bus_speed_133	= 133,
-	ixgbe_bus_speed_2500	= 2500,
-	ixgbe_bus_speed_5000	= 5000,
-	ixgbe_bus_speed_8000	= 8000,
-	ixgbe_bus_speed_reserved
-};
-
-/* PCI bus widths */
-enum ixgbe_bus_width {
-	ixgbe_bus_width_unknown	= 0,
-	ixgbe_bus_width_pcie_x1	= 1,
-	ixgbe_bus_width_pcie_x2	= 2,
-	ixgbe_bus_width_pcie_x4	= 4,
-	ixgbe_bus_width_pcie_x8	= 8,
-	ixgbe_bus_width_32	= 32,
-	ixgbe_bus_width_64	= 64,
-	ixgbe_bus_width_reserved
-};
-
-struct ixgbe_addr_filter_info {
-	u32 num_mc_addrs;
-	u32 rar_used_count;
-	u32 mta_in_use;
-	u32 overflow_promisc;
-	bool user_set_promisc;
-};
-
-/* Bus parameters */
-struct ixgbe_bus_info {
-	enum ixgbe_bus_speed speed;
-	enum ixgbe_bus_width width;
-	enum ixgbe_bus_type type;
-
-	u16 func;
-	u16 lan_id;
-};
-
-/* Flow control parameters */
-struct ixgbe_fc_info {
-	u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */
-	u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */
-	u16 pause_time; /* Flow Control Pause timer */
-	bool send_xon; /* Flow control send XON */
-	bool strict_ieee; /* Strict IEEE mode */
-	bool disable_fc_autoneg; /* Do not autonegotiate FC */
-	bool fc_was_autonegged; /* Is current_mode the result of autonegging? */
-	enum ixgbe_fc_mode current_mode; /* FC mode in effect */
-	enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */
-};
-
-/* Statistics counters collected by the MAC */
-struct ixgbe_hw_stats {
-	u64 crcerrs;
-	u64 illerrc;
-	u64 errbc;
-	u64 mspdc;
-	u64 mpctotal;
-	u64 mpc[8];
-	u64 mlfc;
-	u64 mrfc;
-	u64 rlec;
-	u64 lxontxc;
-	u64 lxonrxc;
-	u64 lxofftxc;
-	u64 lxoffrxc;
-	u64 pxontxc[8];
-	u64 pxonrxc[8];
-	u64 pxofftxc[8];
-	u64 pxoffrxc[8];
-	u64 prc64;
-	u64 prc127;
-	u64 prc255;
-	u64 prc511;
-	u64 prc1023;
-	u64 prc1522;
-	u64 gprc;
-	u64 bprc;
-	u64 mprc;
-	u64 gptc;
-	u64 gorc;
-	u64 gotc;
-	u64 rnbc[8];
-	u64 ruc;
-	u64 rfc;
-	u64 roc;
-	u64 rjc;
-	u64 mngprc;
-	u64 mngpdc;
-	u64 mngptc;
-	u64 tor;
-	u64 tpr;
-	u64 tpt;
-	u64 ptc64;
-	u64 ptc127;
-	u64 ptc255;
-	u64 ptc511;
-	u64 ptc1023;
-	u64 ptc1522;
-	u64 mptc;
-	u64 bptc;
-	u64 xec;
-	u64 qprc[16];
-	u64 qptc[16];
-	u64 qbrc[16];
-	u64 qbtc[16];
-	u64 qprdc[16];
-	u64 pxon2offc[8];
-	u64 fdirustat_add;
-	u64 fdirustat_remove;
-	u64 fdirfstat_fadd;
-	u64 fdirfstat_fremove;
-	u64 fdirmatch;
-	u64 fdirmiss;
-	u64 fccrc;
-	u64 fclast;
-	u64 fcoerpdc;
-	u64 fcoeprc;
-	u64 fcoeptc;
-	u64 fcoedwrc;
-	u64 fcoedwtc;
-	u64 fcoe_noddp;
-	u64 fcoe_noddp_ext_buff;
-	u64 ldpcec;
-	u64 pcrc8ec;
-	u64 b2ospc;
-	u64 b2ogprc;
-	u64 o2bgptc;
-	u64 o2bspc;
-};
-
-/* forward declaration */
-struct ixgbe_hw;
-
-/* iterator type for walking multicast address lists */
-typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
-				  u32 *vmdq);
-
-/* Function pointer table */
-struct ixgbe_eeprom_operations {
-	s32 (*init_params)(struct ixgbe_hw *);
-	s32 (*read)(struct ixgbe_hw *, u16, u16 *);
-	s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
-	s32 (*write)(struct ixgbe_hw *, u16, u16);
-	s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
-	s32 (*validate_checksum)(struct ixgbe_hw *, u16 *);
-	s32 (*update_checksum)(struct ixgbe_hw *);
-	u16 (*calc_checksum)(struct ixgbe_hw *);
-};
-
-struct ixgbe_mac_operations {
-	s32 (*init_hw)(struct ixgbe_hw *);
-	s32 (*reset_hw)(struct ixgbe_hw *);
-	s32 (*start_hw)(struct ixgbe_hw *);
-	s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
-	enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
-	u32 (*get_supported_physical_layer)(struct ixgbe_hw *);
-	s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
-	s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
-	s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *);
-	s32 (*get_device_caps)(struct ixgbe_hw *, u16 *);
-	s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
-	s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *);
-	s32 (*stop_adapter)(struct ixgbe_hw *);
-	s32 (*get_bus_info)(struct ixgbe_hw *);
-	void (*set_lan_id)(struct ixgbe_hw *);
-	s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
-	s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
-	s32 (*setup_sfp)(struct ixgbe_hw *);
-	s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
-	s32 (*disable_sec_rx_path)(struct ixgbe_hw *);
-	s32 (*enable_sec_rx_path)(struct ixgbe_hw *);
-	s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16);
-	void (*release_swfw_sync)(struct ixgbe_hw *, u16);
-
-	/* Link */
-	void (*disable_tx_laser)(struct ixgbe_hw *);
-	void (*enable_tx_laser)(struct ixgbe_hw *);
-	void (*flap_tx_laser)(struct ixgbe_hw *);
-	s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool);
-	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
-	s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
-				     bool *);
-
-	/* Packet Buffer manipulation */
-	void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int);
-
-	/* LED */
-	s32 (*led_on)(struct ixgbe_hw *, u32);
-	s32 (*led_off)(struct ixgbe_hw *, u32);
-	s32 (*blink_led_start)(struct ixgbe_hw *, u32);
-	s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
-
-	/* RAR, Multicast, VLAN */
-	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
-	s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *);
-	s32 (*clear_rar)(struct ixgbe_hw *, u32);
-	s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32);
-	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
-	s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
-	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
-	s32 (*init_rx_addrs)(struct ixgbe_hw *);
-	s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32,
-				   ixgbe_mc_addr_itr);
-	s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
-				   ixgbe_mc_addr_itr, bool clear);
-	s32 (*enable_mc)(struct ixgbe_hw *);
-	s32 (*disable_mc)(struct ixgbe_hw *);
-	s32 (*clear_vfta)(struct ixgbe_hw *);
-	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
-	s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *);
-	s32 (*init_uta_tables)(struct ixgbe_hw *);
-	void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
-	void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
-
-	/* Flow Control */
-	s32 (*fc_enable)(struct ixgbe_hw *);
-
-	/* Manageability interface */
-	s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
-	s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
-	s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
-};
-
-struct ixgbe_phy_operations {
-	s32 (*identify)(struct ixgbe_hw *);
-	s32 (*identify_sfp)(struct ixgbe_hw *);
-	s32 (*init)(struct ixgbe_hw *);
-	s32 (*reset)(struct ixgbe_hw *);
-	s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
-	s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
-	s32 (*setup_link)(struct ixgbe_hw *);
-	s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool,
-				bool);
-	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
-	s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *);
-	s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
-	s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
-	s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
-	s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
-	void (*i2c_bus_clear)(struct ixgbe_hw *);
-	s32 (*check_overtemp)(struct ixgbe_hw *);
-};
-
-struct ixgbe_eeprom_info {
-	struct ixgbe_eeprom_operations ops;
-	enum ixgbe_eeprom_type type;
-	u32 semaphore_delay;
-	u16 word_size;
-	u16 address_bits;
-	u16 word_page_size;
-};
-
-#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED	0x01
-struct ixgbe_mac_info {
-	struct ixgbe_mac_operations ops;
-	enum ixgbe_mac_type type;
-	u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
-	u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
-	u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
-	/* prefix for World Wide Node Name (WWNN) */
-	u16 wwnn_prefix;
-	/* prefix for World Wide Port Name (WWPN) */
-	u16 wwpn_prefix;
-#define IXGBE_MAX_MTA			128
-	u32 mta_shadow[IXGBE_MAX_MTA];
-	s32 mc_filter_type;
-	u32 mcft_size;
-	u32 vft_size;
-	u32 num_rar_entries;
-	u32 rar_highwater;
-	u32 rx_pb_size;
-	u32 max_tx_queues;
-	u32 max_rx_queues;
-	u32 orig_autoc;
-	u8  san_mac_rar_index;
-	u32 orig_autoc2;
-	u16 max_msix_vectors;
-	bool arc_subsystem_valid;
-	bool orig_link_settings_stored;
-	bool autotry_restart;
-	u8 flags;
-	struct ixgbe_thermal_sensor_data  thermal_sensor_data;
-};
-
-struct ixgbe_phy_info {
-	struct ixgbe_phy_operations ops;
-	enum ixgbe_phy_type type;
-	u32 addr;
-	u32 id;
-	enum ixgbe_sfp_type sfp_type;
-	bool sfp_setup_needed;
-	u32 revision;
-	enum ixgbe_media_type media_type;
-	bool reset_disable;
-	ixgbe_autoneg_advertised autoneg_advertised;
-	enum ixgbe_smart_speed smart_speed;
-	bool smart_speed_active;
-	bool multispeed_fiber;
-	bool reset_if_overtemp;
-	bool qsfp_shared_i2c_bus;
-};
-
-#include "ixgbe_mbx.h"
-
-struct ixgbe_mbx_operations {
-	void (*init_params)(struct ixgbe_hw *hw);
-	s32  (*read)(struct ixgbe_hw *, u32 *, u16,  u16);
-	s32  (*write)(struct ixgbe_hw *, u32 *, u16, u16);
-	s32  (*read_posted)(struct ixgbe_hw *, u32 *, u16,  u16);
-	s32  (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
-	s32  (*check_for_msg)(struct ixgbe_hw *, u16);
-	s32  (*check_for_ack)(struct ixgbe_hw *, u16);
-	s32  (*check_for_rst)(struct ixgbe_hw *, u16);
-};
-
-struct ixgbe_mbx_stats {
-	u32 msgs_tx;
-	u32 msgs_rx;
-
-	u32 acks;
-	u32 reqs;
-	u32 rsts;
-};
-
-struct ixgbe_mbx_info {
-	struct ixgbe_mbx_operations ops;
-	struct ixgbe_mbx_stats stats;
-	u32 timeout;
-	u32 udelay;
-	u32 v2p_mailbox;
-	u16 size;
-};
-
-struct ixgbe_hw {
-	u8 __iomem *hw_addr;
-	void *back;
-	struct ixgbe_mac_info mac;
-	struct ixgbe_addr_filter_info addr_ctrl;
-	struct ixgbe_fc_info fc;
-	struct ixgbe_phy_info phy;
-	struct ixgbe_eeprom_info eeprom;
-	struct ixgbe_bus_info bus;
-	struct ixgbe_mbx_info mbx;
-	u16 device_id;
-	u16 vendor_id;
-	u16 subsystem_device_id;
-	u16 subsystem_vendor_id;
-	u8 revision_id;
-	bool adapter_stopped;
-	bool force_full_reset;
-	bool allow_unsupported_sfp;
-};
-
-#define ixgbe_call_func(hw, func, params, error) \
-		(func != NULL) ? func params : error
-
-
-/* Error Codes */
-#define IXGBE_ERR_EEPROM			-1
-#define IXGBE_ERR_EEPROM_CHECKSUM		-2
-#define IXGBE_ERR_PHY				-3
-#define IXGBE_ERR_CONFIG			-4
-#define IXGBE_ERR_PARAM				-5
-#define IXGBE_ERR_MAC_TYPE			-6
-#define IXGBE_ERR_UNKNOWN_PHY			-7
-#define IXGBE_ERR_LINK_SETUP			-8
-#define IXGBE_ERR_ADAPTER_STOPPED		-9
-#define IXGBE_ERR_INVALID_MAC_ADDR		-10
-#define IXGBE_ERR_DEVICE_NOT_SUPPORTED		-11
-#define IXGBE_ERR_MASTER_REQUESTS_PENDING	-12
-#define IXGBE_ERR_INVALID_LINK_SETTINGS		-13
-#define IXGBE_ERR_AUTONEG_NOT_COMPLETE		-14
-#define IXGBE_ERR_RESET_FAILED			-15
-#define IXGBE_ERR_SWFW_SYNC			-16
-#define IXGBE_ERR_PHY_ADDR_INVALID		-17
-#define IXGBE_ERR_I2C				-18
-#define IXGBE_ERR_SFP_NOT_SUPPORTED		-19
-#define IXGBE_ERR_SFP_NOT_PRESENT		-20
-#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT	-21
-#define IXGBE_ERR_NO_SAN_ADDR_PTR		-22
-#define IXGBE_ERR_FDIR_REINIT_FAILED		-23
-#define IXGBE_ERR_EEPROM_VERSION		-24
-#define IXGBE_ERR_NO_SPACE			-25
-#define IXGBE_ERR_OVERTEMP			-26
-#define IXGBE_ERR_FC_NOT_NEGOTIATED		-27
-#define IXGBE_ERR_FC_NOT_SUPPORTED		-28
-#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE	-30
-#define IXGBE_ERR_PBA_SECTION			-31
-#define IXGBE_ERR_INVALID_ARGUMENT		-32
-#define IXGBE_ERR_HOST_INTERFACE_COMMAND	-33
-#define IXGBE_ERR_OUT_OF_MEM			-34
-
-#define IXGBE_NOT_IMPLEMENTED			0x7FFFFFFF
-
-#define UNREFERENCED_XPARAMETER
-
-#endif /* _IXGBE_TYPE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
deleted file mode 100644
index 07b219a1..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
+++ /dev/null
@@ -1,922 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe_x540.h"
-#include "ixgbe_type.h"
-#include "ixgbe_api.h"
-#include "ixgbe_common.h"
-#include "ixgbe_phy.h"
-
-static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw);
-static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
-static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
-static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw);
-
-/**
- *  ixgbe_init_ops_X540 - Inits func ptrs and MAC type
- *  @hw: pointer to hardware structure
- *
- *  Initialize the function pointers and assign the MAC type for X540.
- *  Does not touch the hardware.
- **/
-s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw)
-{
-	struct ixgbe_mac_info *mac = &hw->mac;
-	struct ixgbe_phy_info *phy = &hw->phy;
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	s32 ret_val;
-
-	ret_val = ixgbe_init_phy_ops_generic(hw);
-	ret_val = ixgbe_init_ops_generic(hw);
-
-
-	/* EEPROM */
-	eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540;
-	eeprom->ops.read = &ixgbe_read_eerd_X540;
-	eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540;
-	eeprom->ops.write = &ixgbe_write_eewr_X540;
-	eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540;
-	eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540;
-	eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540;
-	eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540;
-
-	/* PHY */
-	phy->ops.init = &ixgbe_init_phy_ops_generic;
-	phy->ops.reset = NULL;
-
-	/* MAC */
-	mac->ops.reset_hw = &ixgbe_reset_hw_X540;
-	mac->ops.get_media_type = &ixgbe_get_media_type_X540;
-	mac->ops.get_supported_physical_layer =
-				    &ixgbe_get_supported_physical_layer_X540;
-	mac->ops.read_analog_reg8 = NULL;
-	mac->ops.write_analog_reg8 = NULL;
-	mac->ops.start_hw = &ixgbe_start_hw_X540;
-	mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
-	mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
-	mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
-	mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
-	mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
-	mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540;
-	mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540;
-	mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
-	mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
-
-	/* RAR, Multicast, VLAN */
-	mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
-	mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
-	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
-	mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
-	mac->rar_highwater = 1;
-	mac->ops.set_vfta = &ixgbe_set_vfta_generic;
-	mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
-	mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
-	mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
-	mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
-	mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
-
-	/* Link */
-	mac->ops.get_link_capabilities =
-				&ixgbe_get_copper_link_capabilities_generic;
-	mac->ops.setup_link = &ixgbe_setup_mac_link_X540;
-	mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
-	mac->ops.check_link = &ixgbe_check_mac_link_generic;
-
-	mac->mcft_size		= 128;
-	mac->vft_size		= 128;
-	mac->num_rar_entries	= 128;
-	mac->rx_pb_size		= 384;
-	mac->max_tx_queues	= 128;
-	mac->max_rx_queues	= 128;
-	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
-
-	/*
-	 * FWSM register
-	 * ARC supported; valid only if manageability features are
-	 * enabled.
-	 */
-	mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
-				   IXGBE_FWSM_MODE_MASK) ? true : false;
-
-	//hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
-
-	/* LEDs */
-	mac->ops.blink_led_start = ixgbe_blink_led_start_X540;
-	mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540;
-
-	/* Manageability interface */
-	mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
-
-	return ret_val;
-}
-
-/**
- *  ixgbe_get_link_capabilities_X540 - Determines link capabilities
- *  @hw: pointer to hardware structure
- *  @speed: pointer to link speed
- *  @autoneg: true when autoneg or autotry is enabled
- *
- *  Determines the link capabilities by reading the AUTOC register.
- **/
-s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
-				     ixgbe_link_speed *speed,
-				     bool *autoneg)
-{
-	ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg);
-
-	return 0;
-}
-
-/**
- *  ixgbe_get_media_type_X540 - Get media type
- *  @hw: pointer to hardware structure
- *
- *  Returns the media type (fiber, copper, backplane)
- **/
-enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
-{
-	return ixgbe_media_type_copper;
-}
-
-/**
- *  ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities
- *  @hw: pointer to hardware structure
- *  @speed: new link speed
- *  @autoneg: true if autonegotiation enabled
- *  @autoneg_wait_to_complete: true when waiting for completion is needed
- **/
-s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw,
-			      ixgbe_link_speed speed, bool autoneg,
-			      bool autoneg_wait_to_complete)
-{
-	return hw->phy.ops.setup_link_speed(hw, speed, autoneg,
-					    autoneg_wait_to_complete);
-}
-
-/**
- *  ixgbe_reset_hw_X540 - Perform hardware reset
- *  @hw: pointer to hardware structure
- *
- *  Resets the hardware by resetting the transmit and receive units, masks
- *  and clears all interrupts, and perform a reset.
- **/
-s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
-{
-	s32 status = 0;
-
-	/*
-	 * Userland DPDK takes the ownershiop of device
-	 * Kernel driver here used as the simple path for ethtool only
-	 * Won't real reset device anyway
-	 */
-#if 0
-	u32 ctrl, i;
-
-	/* Call adapter stop to disable tx/rx and clear interrupts */
-	status = hw->mac.ops.stop_adapter(hw);
-	if (status != 0)
-		goto reset_hw_out;
-
-	/* flush pending Tx transactions */
-	ixgbe_clear_tx_pending(hw);
-
-mac_reset_top:
-	ctrl = IXGBE_CTRL_RST;
-	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
-	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
-	IXGBE_WRITE_FLUSH(hw);
-
-	/* Poll for reset bit to self-clear indicating reset is complete */
-	for (i = 0; i < 10; i++) {
-		udelay(1);
-		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
-		if (!(ctrl & IXGBE_CTRL_RST_MASK))
-			break;
-	}
-
-	if (ctrl & IXGBE_CTRL_RST_MASK) {
-		status = IXGBE_ERR_RESET_FAILED;
-		hw_dbg(hw, "Reset polling failed to complete.\n");
-	}
-	msleep(100);
-
-	/*
-	 * Double resets are required for recovery from certain error
-	 * conditions.  Between resets, it is necessary to stall to allow time
-	 * for any pending HW events to complete.
-	 */
-	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
-		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
-		goto mac_reset_top;
-	}
-
-	/* Set the Rx packet buffer size. */
-	IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT);
-
-#endif
-
-	/* Store the permanent mac address */
-	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
-
-	/*
-	 * Store MAC address from RAR0, clear receive address registers, and
-	 * clear the multicast table.  Also reset num_rar_entries to 128,
-	 * since we modify this value when programming the SAN MAC address.
-	 */
-	hw->mac.num_rar_entries = 128;
-	hw->mac.ops.init_rx_addrs(hw);
-
-	/* Store the permanent SAN mac address */
-	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
-
-	/* Add the SAN MAC address to the RAR only if it's a valid address */
-	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
-		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
-
-		/* Save the SAN MAC RAR index */
-		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
-
-		/* Reserve the last RAR for the SAN MAC address */
-		hw->mac.num_rar_entries--;
-	}
-
-	/* Store the alternative WWNN/WWPN prefix */
-	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
-				   &hw->mac.wwpn_prefix);
-
-//reset_hw_out:
-	return status;
-}
-
-/**
- *  ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx
- *  @hw: pointer to hardware structure
- *
- *  Starts the hardware using the generic start_hw function
- *  and the generation start_hw function.
- *  Then performs revision-specific operations, if any.
- **/
-s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
-{
-	s32 ret_val = 0;
-
-	ret_val = ixgbe_start_hw_generic(hw);
-	if (ret_val != 0)
-		goto out;
-
-	ret_val = ixgbe_start_hw_gen2(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_get_supported_physical_layer_X540 - Returns physical layer type
- *  @hw: pointer to hardware structure
- *
- *  Determines physical layer capabilities of the current configuration.
- **/
-u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw)
-{
-	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
-	u16 ext_ability = 0;
-
-	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
-	IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
-	if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
-		physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
-	if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
-		physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
-	if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
-		physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
-
-	return physical_layer;
-}
-
-/**
- *  ixgbe_init_eeprom_params_X540 - Initialize EEPROM params
- *  @hw: pointer to hardware structure
- *
- *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
- *  ixgbe_hw struct in order to set up EEPROM access.
- **/
-s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
-{
-	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
-	u32 eec;
-	u16 eeprom_size;
-
-	if (eeprom->type == ixgbe_eeprom_uninitialized) {
-		eeprom->semaphore_delay = 10;
-		eeprom->type = ixgbe_flash;
-
-		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
-		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
-				    IXGBE_EEC_SIZE_SHIFT);
-		eeprom->word_size = 1 << (eeprom_size +
-					  IXGBE_EEPROM_WORD_SIZE_SHIFT);
-
-		hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
-			  eeprom->type, eeprom->word_size);
-	}
-
-	return 0;
-}
-
-/**
- *  ixgbe_read_eerd_X540- Read EEPROM word using EERD
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM using the EERD register.
- **/
-s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
-{
-	s32 status = 0;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0)
-		status = ixgbe_read_eerd_generic(hw, offset, data);
-	else
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-	return status;
-}
-
-/**
- *  ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to read
- *  @words: number of words
- *  @data: word(s) read from the EEPROM
- *
- *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
- **/
-s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
-				u16 offset, u16 words, u16 *data)
-{
-	s32 status = 0;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0)
-		status = ixgbe_read_eerd_buffer_generic(hw, offset,
-							words, data);
-	else
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-	return status;
-}
-
-/**
- *  ixgbe_write_eewr_X540 - Write EEPROM word using EEWR
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to write
- *  @data: word write to the EEPROM
- *
- *  Write a 16 bit word to the EEPROM using the EEWR register.
- **/
-s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
-{
-	s32 status = 0;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0)
-		status = ixgbe_write_eewr_generic(hw, offset, data);
-	else
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-	return status;
-}
-
-/**
- *  ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to write
- *  @words: number of words
- *  @data: word(s) write to the EEPROM
- *
- *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
- **/
-s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
-				 u16 offset, u16 words, u16 *data)
-{
-	s32 status = 0;
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0)
-		status = ixgbe_write_eewr_buffer_generic(hw, offset,
-							 words, data);
-	else
-		status = IXGBE_ERR_SWFW_SYNC;
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-	return status;
-}
-
-/**
- *  ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum
- *
- *  This function does not use synchronization for EERD and EEWR. It can
- *  be used internally by function which utilize ixgbe_acquire_swfw_sync_X540.
- *
- *  @hw: pointer to hardware structure
- **/
-u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
-{
-	u16 i;
-	u16 j;
-	u16 checksum = 0;
-	u16 length = 0;
-	u16 pointer = 0;
-	u16 word = 0;
-
-	/*
-	 * Do not use hw->eeprom.ops.read because we do not want to take
-	 * the synchronization semaphores here. Instead use
-	 * ixgbe_read_eerd_generic
-	 */
-
-	/* Include 0x0-0x3F in the checksum */
-	for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
-		if (ixgbe_read_eerd_generic(hw, i, &word) != 0) {
-			hw_dbg(hw, "EEPROM read failed\n");
-			break;
-		}
-		checksum += word;
-	}
-
-	/*
-	 * Include all data from pointers 0x3, 0x6-0xE.  This excludes the
-	 * FW, PHY module, and PCIe Expansion/Option ROM pointers.
-	 */
-	for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
-		if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
-			continue;
-
-		if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) {
-			hw_dbg(hw, "EEPROM read failed\n");
-			break;
-		}
-
-		/* Skip pointer section if the pointer is invalid. */
-		if (pointer == 0xFFFF || pointer == 0 ||
-		    pointer >= hw->eeprom.word_size)
-			continue;
-
-		if (ixgbe_read_eerd_generic(hw, pointer, &length) !=
-		    0) {
-			hw_dbg(hw, "EEPROM read failed\n");
-			break;
-		}
-
-		/* Skip pointer section if length is invalid. */
-		if (length == 0xFFFF || length == 0 ||
-		    (pointer + length) >= hw->eeprom.word_size)
-			continue;
-
-		for (j = pointer+1; j <= pointer+length; j++) {
-			if (ixgbe_read_eerd_generic(hw, j, &word) !=
-			    0) {
-				hw_dbg(hw, "EEPROM read failed\n");
-				break;
-			}
-			checksum += word;
-		}
-	}
-
-	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
-
-	return checksum;
-}
-
-/**
- *  ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum
- *  @hw: pointer to hardware structure
- *  @checksum_val: calculated checksum
- *
- *  Performs checksum calculation and validates the EEPROM checksum.  If the
- *  caller does not need checksum_val, the value can be NULL.
- **/
-s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
-					u16 *checksum_val)
-{
-	s32 status;
-	u16 checksum;
-	u16 read_checksum = 0;
-
-	/*
-	 * Read the first word from the EEPROM. If this times out or fails, do
-	 * not continue or we could be in for a very long wait while every
-	 * EEPROM read fails
-	 */
-	status = hw->eeprom.ops.read(hw, 0, &checksum);
-
-	if (status != 0) {
-		hw_dbg(hw, "EEPROM read failed\n");
-		goto out;
-	}
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0) {
-		checksum = hw->eeprom.ops.calc_checksum(hw);
-
-		/*
-		 * Do not use hw->eeprom.ops.read because we do not want to take
-		 * the synchronization semaphores twice here.
-		*/
-		ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM,
-					&read_checksum);
-
-		/*
-		 * Verify read checksum from EEPROM is the same as
-		 * calculated checksum
-		 */
-		if (read_checksum != checksum)
-			status = IXGBE_ERR_EEPROM_CHECKSUM;
-
-		/* If the user cares, return the calculated checksum */
-		if (checksum_val)
-			*checksum_val = checksum;
-	} else {
-		status = IXGBE_ERR_SWFW_SYNC;
-	}
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-out:
-	return status;
-}
-
-/**
- * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash
- * @hw: pointer to hardware structure
- *
- * After writing EEPROM to shadow RAM using EEWR register, software calculates
- * checksum and updates the EEPROM and instructs the hardware to update
- * the flash.
- **/
-s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
-{
-	s32 status;
-	u16 checksum;
-
-	/*
-	 * Read the first word from the EEPROM. If this times out or fails, do
-	 * not continue or we could be in for a very long wait while every
-	 * EEPROM read fails
-	 */
-	status = hw->eeprom.ops.read(hw, 0, &checksum);
-
-	if (status != 0)
-		hw_dbg(hw, "EEPROM read failed\n");
-
-	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-	    0) {
-		checksum = hw->eeprom.ops.calc_checksum(hw);
-
-		/*
-		 * Do not use hw->eeprom.ops.write because we do not want to
-		 * take the synchronization semaphores twice here.
-		*/
-		status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM,
-						  checksum);
-
-	if (status == 0)
-		status = ixgbe_update_flash_X540(hw);
-	else
-		status = IXGBE_ERR_SWFW_SYNC;
-	}
-
-	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-
-	return status;
-}
-
-/**
- *  ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device
- *  @hw: pointer to hardware structure
- *
- *  Set FLUP (bit 23) of the EEC register to instruct Hardware to copy
- *  EEPROM from shadow RAM to the flash device.
- **/
-static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
-{
-	u32 flup;
-	s32 status = IXGBE_ERR_EEPROM;
-
-	status = ixgbe_poll_flash_update_done_X540(hw);
-	if (status == IXGBE_ERR_EEPROM) {
-		hw_dbg(hw, "Flash update time out\n");
-		goto out;
-	}
-
-	flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP;
-	IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
-
-	status = ixgbe_poll_flash_update_done_X540(hw);
-	if (status == 0)
-		hw_dbg(hw, "Flash update complete\n");
-	else
-		hw_dbg(hw, "Flash update time out\n");
-
-	if (hw->revision_id == 0) {
-		flup = IXGBE_READ_REG(hw, IXGBE_EEC);
-
-		if (flup & IXGBE_EEC_SEC1VAL) {
-			flup |= IXGBE_EEC_FLUP;
-			IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
-		}
-
-		status = ixgbe_poll_flash_update_done_X540(hw);
-		if (status == 0)
-			hw_dbg(hw, "Flash update complete\n");
-		else
-			hw_dbg(hw, "Flash update time out\n");
-	}
-out:
-	return status;
-}
-
-/**
- *  ixgbe_poll_flash_update_done_X540 - Poll flash update status
- *  @hw: pointer to hardware structure
- *
- *  Polls the FLUDONE (bit 26) of the EEC Register to determine when the
- *  flash update is done.
- **/
-static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
-{
-	u32 i;
-	u32 reg;
-	s32 status = IXGBE_ERR_EEPROM;
-
-	for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) {
-		reg = IXGBE_READ_REG(hw, IXGBE_EEC);
-		if (reg & IXGBE_EEC_FLUDONE) {
-			status = 0;
-			break;
-		}
-		udelay(5);
-	}
-	return status;
-}
-
-/**
- *  ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to acquire
- *
- *  Acquires the SWFW semaphore thought the SW_FW_SYNC register for
- *  the specified function (CSR, PHY0, PHY1, NVM, Flash)
- **/
-s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 5;
-	u32 hwmask = 0;
-	u32 timeout = 200;
-	u32 i;
-	s32 ret_val = 0;
-
-	if (swmask == IXGBE_GSSR_EEP_SM)
-		hwmask = IXGBE_GSSR_FLASH_SM;
-
-	/* SW only mask doesn't have FW bit pair */
-	if (swmask == IXGBE_GSSR_SW_MNG_SM)
-		fwmask = 0;
-
-	for (i = 0; i < timeout; i++) {
-		/*
-		 * SW NVM semaphore bit is used for access to all
-		 * SW_FW_SYNC bits (not just NVM)
-		 */
-		if (ixgbe_get_swfw_sync_semaphore(hw)) {
-			ret_val = IXGBE_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask | hwmask))) {
-			swfw_sync |= swmask;
-			IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
-			ixgbe_release_swfw_sync_semaphore(hw);
-			msleep(5);
-			goto out;
-		} else {
-			/*
-			 * Firmware currently using resource (fwmask), hardware
-			 * currently using resource (hwmask), or other software
-			 * thread currently using resource (swmask)
-			 */
-			ixgbe_release_swfw_sync_semaphore(hw);
-			msleep(5);
-		}
-	}
-
-	/* Failed to get SW only semaphore */
-	if (swmask == IXGBE_GSSR_SW_MNG_SM) {
-		ret_val = IXGBE_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	/* If the resource is not released by the FW/HW the SW can assume that
-	 * the FW/HW malfunctions. In that case the SW should sets the SW bit(s)
-	 * of the requested resource(s) while ignoring the corresponding FW/HW
-	 * bits in the SW_FW_SYNC register.
-	 */
-	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
-	if (swfw_sync & (fwmask | hwmask)) {
-		if (ixgbe_get_swfw_sync_semaphore(hw)) {
-			ret_val = IXGBE_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync |= swmask;
-		IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
-		ixgbe_release_swfw_sync_semaphore(hw);
-		msleep(5);
-	}
-
-out:
-	return ret_val;
-}
-
-/**
- *  ixgbe_release_swfw_sync_X540 - Release SWFW semaphore
- *  @hw: pointer to hardware structure
- *  @mask: Mask to specify which semaphore to release
- *
- *  Releases the SWFW semaphore through the SW_FW_SYNC register
- *  for the specified function (CSR, PHY0, PHY1, EVM, Flash)
- **/
-void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-
-	ixgbe_get_swfw_sync_semaphore(hw);
-
-	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
-	swfw_sync &= ~swmask;
-	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
-
-	ixgbe_release_swfw_sync_semaphore(hw);
-	msleep(5);
-}
-
-/**
- *  ixgbe_get_nvm_semaphore - Get hardware semaphore
- *  @hw: pointer to hardware structure
- *
- *  Sets the hardware semaphores so SW/FW can gain control of shared resources
- **/
-static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
-{
-	s32 status = IXGBE_ERR_EEPROM;
-	u32 timeout = 2000;
-	u32 i;
-	u32 swsm;
-
-	/* Get SMBI software semaphore between device drivers first */
-	for (i = 0; i < timeout; i++) {
-		/*
-		 * If the SMBI bit is 0 when we read it, then the bit will be
-		 * set and we have the semaphore
-		 */
-		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-		if (!(swsm & IXGBE_SWSM_SMBI)) {
-			status = 0;
-			break;
-		}
-		udelay(50);
-	}
-
-	/* Now get the semaphore between SW/FW through the REGSMP bit */
-	if (status == 0) {
-		for (i = 0; i < timeout; i++) {
-			swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
-			if (!(swsm & IXGBE_SWFW_REGSMP))
-				break;
-
-			udelay(50);
-		}
-
-		/*
-		 * Release semaphores and return error if SW NVM semaphore
-		 * was not granted because we don't have access to the EEPROM
-		 */
-		if (i >= timeout) {
-			hw_dbg(hw, "REGSMP Software NVM semaphore not "
-				 "granted.\n");
-			ixgbe_release_swfw_sync_semaphore(hw);
-			status = IXGBE_ERR_EEPROM;
-		}
-	} else {
-		hw_dbg(hw, "Software semaphore SMBI between device drivers "
-			 "not granted.\n");
-	}
-
-	return status;
-}
-
-/**
- *  ixgbe_release_nvm_semaphore - Release hardware semaphore
- *  @hw: pointer to hardware structure
- *
- *  This function clears hardware semaphore bits.
- **/
-static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
-{
-	u32 swsm;
-
-	/* Release both semaphores by writing 0 to the bits REGSMP and SMBI */
-
-	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
-	swsm &= ~IXGBE_SWSM_SMBI;
-	IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
-
-	swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
-	swsm &= ~IXGBE_SWFW_REGSMP;
-	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm);
-
-	IXGBE_WRITE_FLUSH(hw);
-}
-
-/**
- * ixgbe_blink_led_start_X540 - Blink LED based on index.
- * @hw: pointer to hardware structure
- * @index: led number to blink
- *
- * Devices that implement the version 2 interface:
- *   X540
- **/
-s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
-{
-	u32 macc_reg;
-	u32 ledctl_reg;
-	ixgbe_link_speed speed;
-	bool link_up;
-
-	/*
-	 * Link should be up in order for the blink bit in the LED control
-	 * register to work. Force link and speed in the MAC if link is down.
-	 * This will be reversed when we stop the blinking.
-	 */
-	hw->mac.ops.check_link(hw, &speed, &link_up, false);
-	if (link_up == false) {
-		macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
-		macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS;
-		IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
-	}
-	/* Set the LED to LINK_UP + BLINK. */
-	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
-	ledctl_reg |= IXGBE_LED_BLINK(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
-
-/**
- * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index.
- * @hw: pointer to hardware structure
- * @index: led number to stop blinking
- *
- * Devices that implement the version 2 interface:
- *   X540
- **/
-s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
-{
-	u32 macc_reg;
-	u32 ledctl_reg;
-
-	/* Restore the LED to its default value. */
-	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
-	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
-	ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
-	ledctl_reg &= ~IXGBE_LED_BLINK(index);
-	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
-
-	/* Unforce link and speed in the MAC. */
-	macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
-	macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS);
-	IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
-	IXGBE_WRITE_FLUSH(hw);
-
-	return 0;
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
deleted file mode 100644
index 96020911..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _IXGBE_X540_H_
-#define _IXGBE_X540_H_
-
-#include "ixgbe_type.h"
-
-s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
-				     ixgbe_link_speed *speed, bool *autoneg);
-enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw);
-s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-			      bool autoneg, bool link_up_wait_to_complete);
-s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
-s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw);
-u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw);
-
-s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
-s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data);
-s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
-				u16 *data);
-s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data);
-s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
-				 u16 *data);
-s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw);
-s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val);
-u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw);
-
-s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
-void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
-
-s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
-s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
-#endif /* _IXGBE_X540_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
deleted file mode 100644
index 6c994576..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
+++ /dev/null
@@ -1,1231 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "ixgbe.h"
-#include "kcompat.h"
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-/* From lib/vsprintf.c */
-#include <asm/div64.h>
-
-static int skip_atoi(const char **s)
-{
-	int i=0;
-
-	while (isdigit(**s))
-		i = i*10 + *((*s)++) - '0';
-	return i;
-}
-
-#define _kc_ZEROPAD	1		/* pad with zero */
-#define _kc_SIGN	2		/* unsigned/signed long */
-#define _kc_PLUS	4		/* show plus */
-#define _kc_SPACE	8		/* space if plus */
-#define _kc_LEFT	16		/* left justified */
-#define _kc_SPECIAL	32		/* 0x */
-#define _kc_LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
-
-static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
-{
-	char c,sign,tmp[66];
-	const char *digits;
-	const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-	const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-	int i;
-
-	digits = (type & _kc_LARGE) ? large_digits : small_digits;
-	if (type & _kc_LEFT)
-		type &= ~_kc_ZEROPAD;
-	if (base < 2 || base > 36)
-		return 0;
-	c = (type & _kc_ZEROPAD) ? '0' : ' ';
-	sign = 0;
-	if (type & _kc_SIGN) {
-		if (num < 0) {
-			sign = '-';
-			num = -num;
-			size--;
-		} else if (type & _kc_PLUS) {
-			sign = '+';
-			size--;
-		} else if (type & _kc_SPACE) {
-			sign = ' ';
-			size--;
-		}
-	}
-	if (type & _kc_SPECIAL) {
-		if (base == 16)
-			size -= 2;
-		else if (base == 8)
-			size--;
-	}
-	i = 0;
-	if (num == 0)
-		tmp[i++]='0';
-	else while (num != 0)
-		tmp[i++] = digits[do_div(num,base)];
-	if (i > precision)
-		precision = i;
-	size -= precision;
-	if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
-		while(size-->0) {
-			if (buf <= end)
-				*buf = ' ';
-			++buf;
-		}
-	}
-	if (sign) {
-		if (buf <= end)
-			*buf = sign;
-		++buf;
-	}
-	if (type & _kc_SPECIAL) {
-		if (base==8) {
-			if (buf <= end)
-				*buf = '0';
-			++buf;
-		} else if (base==16) {
-			if (buf <= end)
-				*buf = '0';
-			++buf;
-			if (buf <= end)
-				*buf = digits[33];
-			++buf;
-		}
-	}
-	if (!(type & _kc_LEFT)) {
-		while (size-- > 0) {
-			if (buf <= end)
-				*buf = c;
-			++buf;
-		}
-	}
-	while (i < precision--) {
-		if (buf <= end)
-			*buf = '0';
-		++buf;
-	}
-	while (i-- > 0) {
-		if (buf <= end)
-			*buf = tmp[i];
-		++buf;
-	}
-	while (size-- > 0) {
-		if (buf <= end)
-			*buf = ' ';
-		++buf;
-	}
-	return buf;
-}
-
-int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
-{
-	int len;
-	unsigned long long num;
-	int i, base;
-	char *str, *end, c;
-	const char *s;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-				/* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-
-	str = buf;
-	end = buf + size - 1;
-
-	if (end < buf - 1) {
-		end = ((void *) -1);
-		size = end - buf + 1;
-	}
-
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str <= end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
-
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= _kc_LEFT; goto repeat;
-				case '+': flags |= _kc_PLUS; goto repeat;
-				case ' ': flags |= _kc_SPACE; goto repeat;
-				case '#': flags |= _kc_SPECIAL; goto repeat;
-				case '0': flags |= _kc_ZEROPAD; goto repeat;
-			}
-
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= _kc_LEFT;
-			}
-		}
-
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
-			}
-			if (precision < 0)
-				precision = 0;
-		}
-
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
-			qualifier = *fmt;
-			++fmt;
-		}
-
-		/* default base */
-		base = 10;
-
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & _kc_LEFT)) {
-					while (--field_width > 0) {
-						if (str <= end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str <= end)
-					*str = c;
-				++str;
-				while (--field_width > 0) {
-					if (str <= end)
-						*str = ' ';
-					++str;
-				}
-				continue;
-
-			case 's':
-				s = va_arg(args, char *);
-				if (!s)
-					s = "<NULL>";
-
-				len = strnlen(s, precision);
-
-				if (!(flags & _kc_LEFT)) {
-					while (len < field_width--) {
-						if (str <= end)
-							*str = ' ';
-						++str;
-					}
-				}
-				for (i = 0; i < len; ++i) {
-					if (str <= end)
-						*str = *s;
-					++str; ++s;
-				}
-				while (len < field_width--) {
-					if (str <= end)
-						*str = ' ';
-					++str;
-				}
-				continue;
-
-			case 'p':
-				if (field_width == -1) {
-					field_width = 2*sizeof(void *);
-					flags |= _kc_ZEROPAD;
-				}
-				str = number(str, end,
-						(unsigned long) va_arg(args, void *),
-						16, field_width, precision, flags);
-				continue;
-
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
-
-			case '%':
-				if (str <= end)
-					*str = '%';
-				++str;
-				continue;
-
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
-
-			case 'X':
-				flags |= _kc_LARGE;
-			case 'x':
-				base = 16;
-				break;
-
-			case 'd':
-			case 'i':
-				flags |= _kc_SIGN;
-			case 'u':
-				break;
-
-			default:
-				if (str <= end)
-					*str = '%';
-				++str;
-				if (*fmt) {
-					if (str <= end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
-		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & _kc_SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & _kc_SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & _kc_SIGN)
-				num = (signed int) num;
-		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
-	}
-	if (str <= end)
-		*str = '\0';
-	else if (size > 0)
-		/* don't write out a null byte if the buf size is zero */
-		*end = '\0';
-	/* the trailing null byte doesn't count towards the total
-	* ++str;
-	*/
-	return str-buf;
-}
-
-int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	va_start(args, fmt);
-	i = _kc_vsnprintf(buf,size,fmt,args);
-	va_end(args);
-	return i;
-}
-#endif /* < 2.4.8 */
-
-
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
-        unsigned int vfs_assigned = 0;
-#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
-        int pos;
-        struct pci_dev *vfdev;
-        unsigned short dev_id;
-
-        /* only search if we are a PF */
-        if (!dev->is_physfn)
-                return 0;
-
-        /* find SR-IOV capability */
-        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
-        if (!pos)
-                return 0;
-
-        /*
- *          * determine the device ID for the VFs, the vendor ID will be the
- *                   * same as the PF so there is no need to check for that one
- *                            */
-        pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
-
-        /* loop through all the VFs to see if we own any that are assigned */
-       vfdev = pci_get_device(dev->vendor, dev_id, NULL);
-        while (vfdev) {
-                /*
- *                  * It is considered assigned if it is a virtual function with
- *                                   * our dev as the physical function and the assigned bit is set
- *                                                    */
-               if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
-                   (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
-                       vfs_assigned++;
-
-               vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
-       }
-
-#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
-        return vfs_assigned;
-}
-
-#endif /* CONFIG_PCI_IOV */
-#endif /* 3.10.0 */
-
-
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#if defined(CONFIG_HIGHMEM)
-
-#ifndef PCI_DRAM_OFFSET
-#define PCI_DRAM_OFFSET 0
-#endif
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-	return ((u64) (page - mem_map) << PAGE_SHIFT) + offset +
-		PCI_DRAM_OFFSET;
-}
-
-#else /* CONFIG_HIGHMEM */
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-	return pci_map_single(dev, (void *)page_address(page) + offset, size,
-			      direction);
-}
-
-#endif /* CONFIG_HIGHMEM */
-
-void
-_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
-                   int direction)
-{
-	return pci_unmap_single(dev, dma_addr, size, direction);
-}
-
-#endif /* 2.4.13 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-int
-_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
-{
-	if (!pci_dma_supported(dev, mask))
-		return -EIO;
-	dev->dma_mask = mask;
-	return 0;
-}
-
-int
-_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
-{
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		if (pci_resource_len(dev, i) == 0)
-			continue;
-
-		if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
-			if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-				pci_release_regions(dev);
-				return -EBUSY;
-			}
-		} else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
-			if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-				pci_release_regions(dev);
-				return -EBUSY;
-			}
-		}
-	}
-	return 0;
-}
-
-void
-_kc_pci_release_regions(struct pci_dev *dev)
-{
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		if (pci_resource_len(dev, i) == 0)
-			continue;
-
-		if (pci_resource_flags(dev, i) & IORESOURCE_IO)
-			release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-
-		else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
-			release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-	}
-}
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-struct net_device *
-_kc_alloc_etherdev(int sizeof_priv)
-{
-	struct net_device *dev;
-	int alloc_size;
-
-	alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
-	dev = kzalloc(alloc_size, GFP_KERNEL);
-	if (!dev)
-		return NULL;
-
-	if (sizeof_priv)
-		dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
-	dev->name[0] = '\0';
-	ether_setup(dev);
-
-	return dev;
-}
-
-int
-_kc_is_valid_ether_addr(u8 *addr)
-{
-	const char zaddr[6] = { 0, };
-
-	return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
-}
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-int
-_kc_pci_set_power_state(struct pci_dev *dev, int state)
-{
-	return 0;
-}
-
-int
-_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
-{
-	return 0;
-}
-
-#endif /* 2.4.6 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
-                            int off, int size)
-{
-	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-	frag->page = page;
-	frag->page_offset = off;
-	frag->size = size;
-	skb_shinfo(skb)->nr_frags = i + 1;
-}
-
-/*
- * Original Copyright:
- * find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-                            unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffs(tmp);
-}
-
-size_t _kc_strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = strlen(src);
-
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
-}
-
-#endif /* 2.6.0 => 2.4.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	va_start(args, fmt);
-	i = vsnprintf(buf, size, fmt, args);
-	va_end(args);
-	return (i >= size) ? (size - 1) : i;
-}
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-char *_kc_kstrdup(const char *s, unsigned int gfp)
-{
-	size_t len;
-	char *buf;
-
-	if (!s)
-		return NULL;
-
-	len = strlen(s) + 1;
-	buf = kmalloc(len, gfp);
-	if (buf)
-		memcpy(buf, s, len);
-	return buf;
-}
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-void *_kc_kzalloc(size_t size, int flags)
-{
-	void *ret = kmalloc(size, flags);
-	if (ret)
-		memset(ret, 0, size);
-	return ret;
-}
-#endif /* <= 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-int _kc_skb_pad(struct sk_buff *skb, int pad)
-{
-	int ntail;
-
-        /* If the skbuff is non linear tailroom is always zero.. */
-        if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
-		memset(skb->data+skb->len, 0, pad);
-		return 0;
-        }
-
-	ntail = skb->data_len + pad - (skb->end - skb->tail);
-	if (likely(skb_cloned(skb) || ntail > 0)) {
-		if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
-			goto free_skb;
-	}
-
-#ifdef MAX_SKB_FRAGS
-	if (skb_is_nonlinear(skb) &&
-	    !__pskb_pull_tail(skb, skb->data_len))
-		goto free_skb;
-
-#endif
-	memset(skb->data + skb->len, 0, pad);
-        return 0;
-
-free_skb:
-	kfree_skb(skb);
-	return -ENOMEM;
-}
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-int _kc_pci_save_state(struct pci_dev *pdev)
-{
-	struct adapter_struct *adapter = pci_get_drvdata(pdev);
-	int size = PCI_CONFIG_SPACE_LEN, i;
-	u16 pcie_cap_offset, pcie_link_status;
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-	/* no ->dev for 2.4 kernels */
-	WARN_ON(pdev->dev.driver_data == NULL);
-#endif
-	pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-	if (pcie_cap_offset) {
-		if (!pci_read_config_word(pdev,
-		                          pcie_cap_offset + PCIE_LINK_STATUS,
-		                          &pcie_link_status))
-		size = PCIE_CONFIG_SPACE_LEN;
-	}
-	pci_config_space_ich8lan();
-#ifdef HAVE_PCI_ERS
-	if (adapter->config_space == NULL)
-#else
-	WARN_ON(adapter->config_space != NULL);
-#endif
-		adapter->config_space = kmalloc(size, GFP_KERNEL);
-	if (!adapter->config_space) {
-		printk(KERN_ERR "Out of memory in pci_save_state\n");
-		return -ENOMEM;
-	}
-	for (i = 0; i < (size / 4); i++)
-		pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
-	return 0;
-}
-
-void _kc_pci_restore_state(struct pci_dev *pdev)
-{
-	struct adapter_struct *adapter = pci_get_drvdata(pdev);
-	int size = PCI_CONFIG_SPACE_LEN, i;
-	u16 pcie_cap_offset;
-	u16 pcie_link_status;
-
-	if (adapter->config_space != NULL) {
-		pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-		if (pcie_cap_offset &&
-		    !pci_read_config_word(pdev,
-		                          pcie_cap_offset + PCIE_LINK_STATUS,
-		                          &pcie_link_status))
-			size = PCIE_CONFIG_SPACE_LEN;
-
-		pci_config_space_ich8lan();
-		for (i = 0; i < (size / 4); i++)
-		pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
-#ifndef HAVE_PCI_ERS
-		kfree(adapter->config_space);
-		adapter->config_space = NULL;
-#endif
-	}
-}
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-void _kc_free_netdev(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-
-	if (adapter->config_space != NULL)
-		kfree(adapter->config_space);
-#ifdef CONFIG_SYSFS
-	if (netdev->reg_state == NETREG_UNINITIALIZED) {
-		kfree((char *)netdev - netdev->padded);
-	} else {
-		BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
-		netdev->reg_state = NETREG_RELEASED;
-		class_device_put(&netdev->class_dev);
-	}
-#else
-	kfree((char *)netdev - netdev->padded);
-#endif
-}
-#endif
-
-void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
-{
-	void *p;
-
-	p = kzalloc(len, gfp);
-	if (p)
-		memcpy(p, src, len);
-	return p;
-}
-#endif /* <= 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-/* hexdump code taken from lib/hexdump.c */
-static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
-			int groupsize, unsigned char *linebuf,
-			size_t linebuflen, bool ascii)
-{
-	const u8 *ptr = buf;
-	u8 ch;
-	int j, lx = 0;
-	int ascii_column;
-
-	if (rowsize != 16 && rowsize != 32)
-		rowsize = 16;
-
-	if (!len)
-		goto nil;
-	if (len > rowsize)		/* limit to one line at a time */
-		len = rowsize;
-	if ((len % groupsize) != 0)	/* no mixed size output */
-		groupsize = 1;
-
-	switch (groupsize) {
-	case 8: {
-		const u64 *ptr8 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%16.16llx", j ? " " : "",
-				(unsigned long long)*(ptr8 + j));
-		ascii_column = 17 * ngroups + 2;
-		break;
-	}
-
-	case 4: {
-		const u32 *ptr4 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%8.8x", j ? " " : "", *(ptr4 + j));
-		ascii_column = 9 * ngroups + 2;
-		break;
-	}
-
-	case 2: {
-		const u16 *ptr2 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%4.4x", j ? " " : "", *(ptr2 + j));
-		ascii_column = 5 * ngroups + 2;
-		break;
-	}
-
-	default:
-		for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
-			ch = ptr[j];
-			linebuf[lx++] = hex_asc(ch >> 4);
-			linebuf[lx++] = hex_asc(ch & 0x0f);
-			linebuf[lx++] = ' ';
-		}
-		if (j)
-			lx--;
-
-		ascii_column = 3 * rowsize + 2;
-		break;
-	}
-	if (!ascii)
-		goto nil;
-
-	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
-		linebuf[lx++] = ' ';
-	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
-		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
-				: '.';
-nil:
-	linebuf[lx++] = '\0';
-}
-
-void _kc_print_hex_dump(const char *level,
-			const char *prefix_str, int prefix_type,
-			int rowsize, int groupsize,
-			const void *buf, size_t len, bool ascii)
-{
-	const u8 *ptr = buf;
-	int i, linelen, remaining = len;
-	unsigned char linebuf[200];
-
-	if (rowsize != 16 && rowsize != 32)
-		rowsize = 16;
-
-	for (i = 0; i < len; i += rowsize) {
-		linelen = min(remaining, rowsize);
-		remaining -= rowsize;
-		_kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-				linebuf, sizeof(linebuf), ascii);
-
-		switch (prefix_type) {
-		case DUMP_PREFIX_ADDRESS:
-			printk("%s%s%*p: %s\n", level, prefix_str,
-				(int)(2 * sizeof(void *)), ptr + i, linebuf);
-			break;
-		case DUMP_PREFIX_OFFSET:
-			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
-			break;
-		default:
-			printk("%s%s%s\n", level, prefix_str, linebuf);
-			break;
-		}
-	}
-}
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
-int ixgbe_dcb_netlink_register(void)
-{
-	return 0;
-}
-
-int ixgbe_dcb_netlink_unregister(void)
-{
-	return 0;
-}
-
-int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max)
-{
-	return 0;
-}
-#endif /* < 2.6.23 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifdef NAPI
-struct net_device *napi_to_poll_dev(struct napi_struct *napi)
-{
-	struct adapter_q_vector *q_vector = container_of(napi,
-	                                                struct adapter_q_vector,
-	                                                napi);
-	return &q_vector->poll_dev;
-}
-
-int __kc_adapter_clean(struct net_device *netdev, int *budget)
-{
-	int work_done;
-	int work_to_do = min(*budget, netdev->quota);
-	/* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
-	struct napi_struct *napi = netdev->priv;
-	work_done = napi->poll(napi, work_to_do);
-	*budget -= work_done;
-	netdev->quota -= work_done;
-	return (work_done >= work_to_do) ? 1 : 0;
-}
-#endif /* NAPI */
-#endif /* <= 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
-{
-	struct pci_dev *parent = pdev->bus->self;
-	u16 link_state;
-	int pos;
-
-	if (!parent)
-		return;
-
-	pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
-	if (pos) {
-		pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
-		link_state &= ~state;
-		pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
-	}
-}
-#endif /* < 2.6.26 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-#ifdef HAVE_TX_MQ
-void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_stop_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-}
-void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_wake_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_wake_subqueue(netdev, i);
-}
-void _kc_netif_tx_start_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_start_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-}
-#endif /* HAVE_TX_MQ */
-
-#ifndef __WARN_printf
-void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
-{
-	va_list args;
-
-	printk(KERN_WARNING "------------[ cut here ]------------\n");
-	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
-	va_start(args, fmt);
-	vprintk(fmt, args);
-	va_end(args);
-
-	dump_stack();
-}
-#endif /* __WARN_printf */
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-
-int
-_kc_pci_prepare_to_sleep(struct pci_dev *dev)
-{
-	pci_power_t target_state;
-	int error;
-
-	target_state = pci_choose_state(dev, PMSG_SUSPEND);
-
-	pci_enable_wake(dev, target_state, true);
-
-	error = pci_set_power_state(dev, target_state);
-
-	if (error)
-		pci_enable_wake(dev, target_state, false);
-
-	return error;
-}
-
-int
-_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
-{
-	int err;
-
-	err = pci_enable_wake(dev, PCI_D3cold, enable);
-	if (err)
-		goto out;
-
-	err = pci_enable_wake(dev, PCI_D3hot, enable);
-
-out:
-	return err;
-}
-#endif /* < 2.6.28 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-			 int off, int size)
-{
-	skb_fill_page_desc(skb, i, page, off, size);
-	skb->len += size;
-	skb->data_len += size;
-	skb->truesize += size;
-}
-#endif /* < 3.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
-#ifdef HAVE_NETDEV_SELECT_QUEUE
-#include <net/ip.h>
-static u32 _kc_simple_tx_hashrnd;
-static u32 _kc_simple_tx_hashrnd_initialized;
-
-u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
-{
-	u32 addr1, addr2, ports;
-	u32 hash, ihl;
-	u8 ip_proto = 0;
-
-	if (unlikely(!_kc_simple_tx_hashrnd_initialized)) {
-		get_random_bytes(&_kc_simple_tx_hashrnd, 4);
-		_kc_simple_tx_hashrnd_initialized = 1;
-	}
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
-			ip_proto = ip_hdr(skb)->protocol;
-		addr1 = ip_hdr(skb)->saddr;
-		addr2 = ip_hdr(skb)->daddr;
-		ihl = ip_hdr(skb)->ihl;
-		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case htons(ETH_P_IPV6):
-		ip_proto = ipv6_hdr(skb)->nexthdr;
-		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
-		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
-		ihl = (40 >> 2);
-		break;
-#endif
-	default:
-		return 0;
-	}
-
-
-	switch (ip_proto) {
-	case IPPROTO_TCP:
-	case IPPROTO_UDP:
-	case IPPROTO_DCCP:
-	case IPPROTO_ESP:
-	case IPPROTO_AH:
-	case IPPROTO_SCTP:
-	case IPPROTO_UDPLITE:
-		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
-		break;
-
-	default:
-		ports = 0;
-		break;
-	}
-
-	hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd);
-
-	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
-}
-#endif /* HAVE_NETDEV_SELECT_QUEUE */
-#endif /* < 2.6.30 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-#ifdef HAVE_TX_MQ
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
-{
-	unsigned int real_num = dev->real_num_tx_queues;
-	struct Qdisc *qdisc;
-	int i;
-
-	if (unlikely(txq > dev->num_tx_queues))
-		;
-	else if (txq > real_num)
-		dev->real_num_tx_queues = txq;
-	else if ( txq < real_num) {
-		dev->real_num_tx_queues = txq;
-		for (i = txq; i < dev->num_tx_queues; i++) {
-			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
-			if (qdisc) {
-				spin_lock_bh(qdisc_lock(qdisc));
-				qdisc_reset(qdisc);
-				spin_unlock_bh(qdisc_lock(qdisc));
-			}
-		}
-	}
-}
-#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
-#endif /* HAVE_TX_MQ */
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-static const u32 _kc_flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
-
-u32 _kc_ethtool_op_get_flags(struct net_device *dev)
-{
-	return dev->features & _kc_flags_dup_features;
-}
-
-int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	if (data & ~supported)
-		return -EINVAL;
-
-	dev->features = ((dev->features & ~_kc_flags_dup_features) |
-			 (data & _kc_flags_dup_features));
-	return 0;
-}
-#endif /* < 2.6.36 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
-u8 _kc_netdev_get_num_tc(struct net_device *dev)
-{
-	struct adapter_struct *kc_adapter = netdev_priv(dev);
-	if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-		return kc_adapter->tc;
-	else
-		return 0;
-}
-
-u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up)
-{
-	struct adapter_struct *kc_adapter = netdev_priv(dev);
-	int tc;
-	u8 map;
-
-	for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) {
-		map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
-
-		if (map & (1 << up))
-			return tc;
-	}
-
-	return 0;
-}
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
-#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
deleted file mode 100644
index 7c7d6c31..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
+++ /dev/null
@@ -1,3140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#ifndef _KCOMPAT_H_
-#define _KCOMPAT_H_
-
-#ifndef LINUX_VERSION_CODE
-#include <linux/version.h>
-#else
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
-#endif
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/mii.h>
-#include <linux/vmalloc.h>
-#include <asm/io.h>
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-
-/* NAPI enable/disable flags here */
-/* enable NAPI for ixgbe by default */
-#undef CONFIG_IXGBE_NAPI
-#define CONFIG_IXGBE_NAPI
-#define NAPI
-#ifdef CONFIG_IXGBE_NAPI
-#undef NAPI
-#define NAPI
-#endif /* CONFIG_IXGBE_NAPI */
-#ifdef IXGBE_NAPI
-#undef NAPI
-#define NAPI
-#endif /* IXGBE_NAPI */
-#ifdef IXGBE_NO_NAPI
-#undef NAPI
-#endif /* IXGBE_NO_NAPI */
-
-#define adapter_struct ixgbe_adapter
-#define adapter_q_vector ixgbe_q_vector
-
-/* and finally set defines so that the code sees the changes */
-#ifdef NAPI
-#ifndef CONFIG_IXGBE_NAPI
-#define CONFIG_IXGBE_NAPI
-#endif
-#else
-#undef CONFIG_IXGBE_NAPI
-#endif /* NAPI */
-
-/* packet split disable/enable */
-#ifdef DISABLE_PACKET_SPLIT
-#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT
-#endif
-#endif /* DISABLE_PACKET_SPLIT */
-
-/* MSI compatibility code for all kernels and drivers */
-#ifdef DISABLE_PCI_MSI
-#undef CONFIG_PCI_MSI
-#endif
-#ifndef CONFIG_PCI_MSI
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
-struct msix_entry {
-	u16 vector; /* kernel uses to write allocated vector */
-	u16 entry;  /* driver uses to specify entry, OS writes */
-};
-#endif
-#undef pci_enable_msi
-#define pci_enable_msi(a) -ENOTSUPP
-#undef pci_disable_msi
-#define pci_disable_msi(a) do {} while (0)
-#undef pci_enable_msix
-#define pci_enable_msix(a, b, c) -ENOTSUPP
-#undef pci_disable_msix
-#define pci_disable_msix(a) do {} while (0)
-#define msi_remove_pci_irq_vectors(a) do {} while (0)
-#endif /* CONFIG_PCI_MSI */
-#ifdef DISABLE_PM
-#undef CONFIG_PM
-#endif
-
-#ifdef DISABLE_NET_POLL_CONTROLLER
-#undef CONFIG_NET_POLL_CONTROLLER
-#endif
-
-#ifndef PMSG_SUSPEND
-#define PMSG_SUSPEND 3
-#endif
-
-/* generic boolean compatibility */
-#undef TRUE
-#undef FALSE
-#define TRUE true
-#define FALSE false
-#ifdef GCC_VERSION
-#if ( GCC_VERSION < 3000 )
-#define _Bool char
-#endif
-#else
-#define _Bool char
-#endif
-
-/* kernels less than 2.4.14 don't have this */
-#ifndef ETH_P_8021Q
-#define ETH_P_8021Q 0x8100
-#endif
-
-#ifndef module_param
-#define module_param(v,t,p) MODULE_PARM(v, "i");
-#endif
-
-#ifndef DMA_64BIT_MASK
-#define DMA_64BIT_MASK  0xffffffffffffffffULL
-#endif
-
-#ifndef DMA_32BIT_MASK
-#define DMA_32BIT_MASK  0x00000000ffffffffULL
-#endif
-
-#ifndef PCI_CAP_ID_EXP
-#define PCI_CAP_ID_EXP 0x10
-#endif
-
-#ifndef PCIE_LINK_STATE_L0S
-#define PCIE_LINK_STATE_L0S 1
-#endif
-#ifndef PCIE_LINK_STATE_L1
-#define PCIE_LINK_STATE_L1 2
-#endif
-
-#ifndef mmiowb
-#ifdef CONFIG_IA64
-#define mmiowb() asm volatile ("mf.a" ::: "memory")
-#else
-#define mmiowb()
-#endif
-#endif
-
-#ifndef SET_NETDEV_DEV
-#define SET_NETDEV_DEV(net, pdev)
-#endif
-
-#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
-#define free_netdev(x)	kfree(x)
-#endif
-
-#ifdef HAVE_POLL_CONTROLLER
-#define CONFIG_NET_POLL_CONTROLLER
-#endif
-
-#ifndef SKB_DATAREF_SHIFT
-/* if we do not have the infrastructure to detect if skb_header is cloned
-   just return false in all cases */
-#define skb_header_cloned(x) 0
-#endif
-
-#ifndef NETIF_F_GSO
-#define gso_size tso_size
-#define gso_segs tso_segs
-#endif
-
-#ifndef NETIF_F_GRO
-#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
-		vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
-#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
-#endif
-
-#ifndef NETIF_F_SCTP_CSUM
-#define NETIF_F_SCTP_CSUM 0
-#endif
-
-#ifndef NETIF_F_LRO
-#define NETIF_F_LRO (1 << 15)
-#endif
-
-#ifndef NETIF_F_NTUPLE
-#define NETIF_F_NTUPLE (1 << 27)
-#endif
-
-#ifndef IPPROTO_SCTP
-#define IPPROTO_SCTP 132
-#endif
-
-#ifndef CHECKSUM_PARTIAL
-#define CHECKSUM_PARTIAL CHECKSUM_HW
-#define CHECKSUM_COMPLETE CHECKSUM_HW
-#endif
-
-#ifndef __read_mostly
-#define __read_mostly
-#endif
-
-#ifndef MII_RESV1
-#define MII_RESV1		0x17		/* Reserved...		*/
-#endif
-
-#ifndef unlikely
-#define unlikely(_x) _x
-#define likely(_x) _x
-#endif
-
-#ifndef WARN_ON
-#define WARN_ON(x)
-#endif
-
-#ifndef PCI_DEVICE
-#define PCI_DEVICE(vend,dev) \
-	.vendor = (vend), .device = (dev), \
-	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
-#endif
-
-#ifndef node_online
-#define node_online(node) ((node) == 0)
-#endif
-
-#ifndef num_online_cpus
-#define num_online_cpus() smp_num_cpus
-#endif
-
-#ifndef cpu_online
-#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
-#endif
-
-#ifndef _LINUX_RANDOM_H
-#include <linux/random.h>
-#endif
-
-#ifndef DECLARE_BITMAP
-#ifndef BITS_TO_LONGS
-#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#endif
-#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
-#endif
-
-#ifndef VLAN_HLEN
-#define VLAN_HLEN 4
-#endif
-
-#ifndef VLAN_ETH_HLEN
-#define VLAN_ETH_HLEN 18
-#endif
-
-#ifndef VLAN_ETH_FRAME_LEN
-#define VLAN_ETH_FRAME_LEN 1518
-#endif
-
-#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
-#define dca_get_tag(b) 0
-#define dca_add_requester(a) -1
-#define dca_remove_requester(b) do { } while(0)
-#define DCA_PROVIDER_ADD     0x0001
-#define DCA_PROVIDER_REMOVE  0x0002
-#endif
-
-#ifndef DCA_GET_TAG_TWO_ARGS
-#define dca3_get_tag(a,b) dca_get_tag(b)
-#endif
-
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#if defined(__i386__) || defined(__x86_64__)
-#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#endif
-#endif
-
-/* taken from 2.6.24 definition in linux/kernel.h */
-#ifndef IS_ALIGNED
-#define IS_ALIGNED(x,a)         (((x) % ((typeof(x))(a))) == 0)
-#endif
-
-#ifndef NETIF_F_HW_VLAN_TX
-struct _kc_vlan_ethhdr {
-	unsigned char	h_dest[ETH_ALEN];
-	unsigned char	h_source[ETH_ALEN];
-	__be16		h_vlan_proto;
-	__be16		h_vlan_TCI;
-	__be16		h_vlan_encapsulated_proto;
-};
-#define vlan_ethhdr _kc_vlan_ethhdr
-struct _kc_vlan_hdr {
-	__be16		h_vlan_TCI;
-	__be16		h_vlan_encapsulated_proto;
-};
-#define vlan_hdr _kc_vlan_hdr
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#define vlan_tx_tag_present(_skb) 0
-#define vlan_tx_tag_get(_skb) 0
-#endif
-#endif
-
-#ifndef VLAN_PRIO_SHIFT
-#define VLAN_PRIO_SHIFT 13
-#endif
-
-
-#ifndef __GFP_COLD
-#define __GFP_COLD 0
-#endif
-
-/*****************************************************************************/
-/* Installations with ethtool version without eeprom, adapter id, or statistics
- * support */
-
-#ifndef ETH_GSTRING_LEN
-#define ETH_GSTRING_LEN 32
-#endif
-
-#ifndef ETHTOOL_GSTATS
-#define ETHTOOL_GSTATS 0x1d
-#undef ethtool_drvinfo
-#define ethtool_drvinfo k_ethtool_drvinfo
-struct k_ethtool_drvinfo {
-	u32 cmd;
-	char driver[32];
-	char version[32];
-	char fw_version[32];
-	char bus_info[32];
-	char reserved1[32];
-	char reserved2[16];
-	u32 n_stats;
-	u32 testinfo_len;
-	u32 eedump_len;
-	u32 regdump_len;
-};
-
-struct ethtool_stats {
-	u32 cmd;
-	u32 n_stats;
-	u64 data[0];
-};
-#endif /* ETHTOOL_GSTATS */
-
-#ifndef ETHTOOL_PHYS_ID
-#define ETHTOOL_PHYS_ID 0x1c
-#endif /* ETHTOOL_PHYS_ID */
-
-#ifndef ETHTOOL_GSTRINGS
-#define ETHTOOL_GSTRINGS 0x1b
-enum ethtool_stringset {
-	ETH_SS_TEST             = 0,
-	ETH_SS_STATS,
-};
-struct ethtool_gstrings {
-	u32 cmd;            /* ETHTOOL_GSTRINGS */
-	u32 string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
-	u32 len;            /* number of strings in the string set */
-	u8 data[0];
-};
-#endif /* ETHTOOL_GSTRINGS */
-
-#ifndef ETHTOOL_TEST
-#define ETHTOOL_TEST 0x1a
-enum ethtool_test_flags {
-	ETH_TEST_FL_OFFLINE	= (1 << 0),
-	ETH_TEST_FL_FAILED	= (1 << 1),
-};
-struct ethtool_test {
-	u32 cmd;
-	u32 flags;
-	u32 reserved;
-	u32 len;
-	u64 data[0];
-};
-#endif /* ETHTOOL_TEST */
-
-#ifndef ETHTOOL_GEEPROM
-#define ETHTOOL_GEEPROM 0xb
-#undef ETHTOOL_GREGS
-struct ethtool_eeprom {
-	u32 cmd;
-	u32 magic;
-	u32 offset;
-	u32 len;
-	u8 data[0];
-};
-
-struct ethtool_value {
-	u32 cmd;
-	u32 data;
-};
-#endif /* ETHTOOL_GEEPROM */
-
-#ifndef ETHTOOL_GLINK
-#define ETHTOOL_GLINK 0xa
-#endif /* ETHTOOL_GLINK */
-
-#ifndef ETHTOOL_GWOL
-#define ETHTOOL_GWOL 0x5
-#define ETHTOOL_SWOL 0x6
-#define SOPASS_MAX      6
-struct ethtool_wolinfo {
-	u32 cmd;
-	u32 supported;
-	u32 wolopts;
-	u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
-};
-#endif /* ETHTOOL_GWOL */
-
-#ifndef ETHTOOL_GREGS
-#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
-#define ethtool_regs _kc_ethtool_regs
-/* for passing big chunks of data */
-struct _kc_ethtool_regs {
-	u32 cmd;
-	u32 version; /* driver-specific, indicates different chips/revs */
-	u32 len; /* bytes */
-	u8 data[0];
-};
-#endif /* ETHTOOL_GREGS */
-
-#ifndef ETHTOOL_GMSGLVL
-#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
-#endif
-#ifndef ETHTOOL_SMSGLVL
-#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
-#endif
-#ifndef ETHTOOL_NWAY_RST
-#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
-#endif
-#ifndef ETHTOOL_GLINK
-#define ETHTOOL_GLINK		0x0000000a /* Get link status */
-#endif
-#ifndef ETHTOOL_GEEPROM
-#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
-#endif
-#ifndef ETHTOOL_SEEPROM
-#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
-#endif
-#ifndef ETHTOOL_GCOALESCE
-#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
-/* for configuring coalescing parameters of chip */
-#define ethtool_coalesce _kc_ethtool_coalesce
-struct _kc_ethtool_coalesce {
-	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
-
-	/* How many usecs to delay an RX interrupt after
-	 * a packet arrives.  If 0, only rx_max_coalesced_frames
-	 * is used.
-	 */
-	u32	rx_coalesce_usecs;
-
-	/* How many packets to delay an RX interrupt after
-	 * a packet arrives.  If 0, only rx_coalesce_usecs is
-	 * used.  It is illegal to set both usecs and max frames
-	 * to zero as this would cause RX interrupts to never be
-	 * generated.
-	 */
-	u32	rx_max_coalesced_frames;
-
-	/* Same as above two parameters, except that these values
-	 * apply while an IRQ is being serviced by the host.  Not
-	 * all cards support this feature and the values are ignored
-	 * in that case.
-	 */
-	u32	rx_coalesce_usecs_irq;
-	u32	rx_max_coalesced_frames_irq;
-
-	/* How many usecs to delay a TX interrupt after
-	 * a packet is sent.  If 0, only tx_max_coalesced_frames
-	 * is used.
-	 */
-	u32	tx_coalesce_usecs;
-
-	/* How many packets to delay a TX interrupt after
-	 * a packet is sent.  If 0, only tx_coalesce_usecs is
-	 * used.  It is illegal to set both usecs and max frames
-	 * to zero as this would cause TX interrupts to never be
-	 * generated.
-	 */
-	u32	tx_max_coalesced_frames;
-
-	/* Same as above two parameters, except that these values
-	 * apply while an IRQ is being serviced by the host.  Not
-	 * all cards support this feature and the values are ignored
-	 * in that case.
-	 */
-	u32	tx_coalesce_usecs_irq;
-	u32	tx_max_coalesced_frames_irq;
-
-	/* How many usecs to delay in-memory statistics
-	 * block updates.  Some drivers do not have an in-memory
-	 * statistic block, and in such cases this value is ignored.
-	 * This value must not be zero.
-	 */
-	u32	stats_block_coalesce_usecs;
-
-	/* Adaptive RX/TX coalescing is an algorithm implemented by
-	 * some drivers to improve latency under low packet rates and
-	 * improve throughput under high packet rates.  Some drivers
-	 * only implement one of RX or TX adaptive coalescing.  Anything
-	 * not implemented by the driver causes these values to be
-	 * silently ignored.
-	 */
-	u32	use_adaptive_rx_coalesce;
-	u32	use_adaptive_tx_coalesce;
-
-	/* When the packet rate (measured in packets per second)
-	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
-	 * used.
-	 */
-	u32	pkt_rate_low;
-	u32	rx_coalesce_usecs_low;
-	u32	rx_max_coalesced_frames_low;
-	u32	tx_coalesce_usecs_low;
-	u32	tx_max_coalesced_frames_low;
-
-	/* When the packet rate is below pkt_rate_high but above
-	 * pkt_rate_low (both measured in packets per second) the
-	 * normal {rx,tx}_* coalescing parameters are used.
-	 */
-
-	/* When the packet rate is (measured in packets per second)
-	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
-	 * used.
-	 */
-	u32	pkt_rate_high;
-	u32	rx_coalesce_usecs_high;
-	u32	rx_max_coalesced_frames_high;
-	u32	tx_coalesce_usecs_high;
-	u32	tx_max_coalesced_frames_high;
-
-	/* How often to do adaptive coalescing packet rate sampling,
-	 * measured in seconds.  Must not be zero.
-	 */
-	u32	rate_sample_interval;
-};
-#endif /* ETHTOOL_GCOALESCE */
-
-#ifndef ETHTOOL_SCOALESCE
-#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
-#endif
-#ifndef ETHTOOL_GRINGPARAM
-#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
-/* for configuring RX/TX ring parameters */
-#define ethtool_ringparam _kc_ethtool_ringparam
-struct _kc_ethtool_ringparam {
-	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
-
-	/* Read only attributes.  These indicate the maximum number
-	 * of pending RX/TX ring entries the driver will allow the
-	 * user to set.
-	 */
-	u32	rx_max_pending;
-	u32	rx_mini_max_pending;
-	u32	rx_jumbo_max_pending;
-	u32	tx_max_pending;
-
-	/* Values changeable by the user.  The valid values are
-	 * in the range 1 to the "*_max_pending" counterpart above.
-	 */
-	u32	rx_pending;
-	u32	rx_mini_pending;
-	u32	rx_jumbo_pending;
-	u32	tx_pending;
-};
-#endif /* ETHTOOL_GRINGPARAM */
-
-#ifndef ETHTOOL_SRINGPARAM
-#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
-#endif
-#ifndef ETHTOOL_GPAUSEPARAM
-#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
-/* for configuring link flow control parameters */
-#define ethtool_pauseparam _kc_ethtool_pauseparam
-struct _kc_ethtool_pauseparam {
-	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
-
-	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
-	 * being true) the user may set 'autoneg' here non-zero to have the
-	 * pause parameters be auto-negotiated too.  In such a case, the
-	 * {rx,tx}_pause values below determine what capabilities are
-	 * advertised.
-	 *
-	 * If 'autoneg' is zero or the link is not being auto-negotiated,
-	 * then {rx,tx}_pause force the driver to use/not-use pause
-	 * flow control.
-	 */
-	u32	autoneg;
-	u32	rx_pause;
-	u32	tx_pause;
-};
-#endif /* ETHTOOL_GPAUSEPARAM */
-
-#ifndef ETHTOOL_SPAUSEPARAM
-#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
-#endif
-#ifndef ETHTOOL_GRXCSUM
-#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_SRXCSUM
-#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_GTXCSUM
-#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_STXCSUM
-#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_GSG
-#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
-					    * (ethtool_value) */
-#endif
-#ifndef ETHTOOL_SSG
-#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
-					    * (ethtool_value). */
-#endif
-#ifndef ETHTOOL_TEST
-#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
-#endif
-#ifndef ETHTOOL_GSTRINGS
-#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
-#endif
-#ifndef ETHTOOL_PHYS_ID
-#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
-#endif
-#ifndef ETHTOOL_GSTATS
-#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
-#endif
-#ifndef ETHTOOL_GTSO
-#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
-#endif
-#ifndef ETHTOOL_STSO
-#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
-#endif
-
-#ifndef ETHTOOL_BUSINFO_LEN
-#define ETHTOOL_BUSINFO_LEN	32
-#endif
-
-#ifndef RHEL_RELEASE_CODE
-/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
-#define RHEL_RELEASE_CODE 0
-#endif
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
-#endif
-#ifndef AX_RELEASE_CODE
-#define AX_RELEASE_CODE 0
-#endif
-#ifndef AX_RELEASE_VERSION
-#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
-#endif
-
-/* SuSE version macro is the same as Linux kernel version */
-#ifndef SLE_VERSION
-#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
-#endif
-#ifndef SLE_VERSION_CODE
-#ifdef CONFIG_SUSE_KERNEL
-/* SLES11 GA is 2.6.27 based */
-#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
-#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
-#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
-/* SLES11 SP1 is 2.6.32 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
-#else
-#define SLE_VERSION_CODE 0
-#endif
-#else /* CONFIG_SUSE_KERNEL */
-#define SLE_VERSION_CODE 0
-#endif /* CONFIG_SUSE_KERNEL */
-#endif /* SLE_VERSION_CODE */
-
-#ifdef __KLOCWORK__
-#ifdef ARRAY_SIZE
-#undef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-#endif /* __KLOCWORK__ */
-
-/*****************************************************************************/
-/* 2.4.3 => 2.4.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-#ifndef pci_set_dma_mask
-#define pci_set_dma_mask _kc_pci_set_dma_mask
-extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
-#endif
-
-#ifndef pci_request_regions
-#define pci_request_regions _kc_pci_request_regions
-extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
-#endif
-
-#ifndef pci_release_regions
-#define pci_release_regions _kc_pci_release_regions
-extern void _kc_pci_release_regions(struct pci_dev *pdev);
-#endif
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-#ifndef alloc_etherdev
-#define alloc_etherdev _kc_alloc_etherdev
-extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
-#endif
-
-#ifndef is_valid_ether_addr
-#define is_valid_ether_addr _kc_is_valid_ether_addr
-extern int _kc_is_valid_ether_addr(u8 *addr);
-#endif
-
-/**************************************/
-/* MISCELLANEOUS */
-
-#ifndef INIT_TQUEUE
-#define INIT_TQUEUE(_tq, _routine, _data)		\
-	do {						\
-		INIT_LIST_HEAD(&(_tq)->list);		\
-		(_tq)->sync = 0;			\
-		(_tq)->routine = _routine;		\
-		(_tq)->data = _data;			\
-	} while (0)
-#endif
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
-/* Generic MII registers. */
-#define MII_BMCR            0x00        /* Basic mode control register */
-#define MII_BMSR            0x01        /* Basic mode status register  */
-#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
-#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
-#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
-#define MII_LPA             0x05        /* Link partner ability reg    */
-#define MII_EXPANSION       0x06        /* Expansion register          */
-/* Basic mode control register. */
-#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
-#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
-/* Basic mode status register. */
-#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
-#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
-#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
-#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
-#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
-#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
-/* Advertisement control register. */
-#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
-#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
-#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
-#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
-#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
-#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
-                       ADVERTISE_100HALF | ADVERTISE_100FULL)
-/* Expansion register for auto-negotiation. */
-#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
-#endif
-
-/*****************************************************************************/
-/* 2.4.6 => 2.4.3 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-#ifndef pci_set_power_state
-#define pci_set_power_state _kc_pci_set_power_state
-extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
-#endif
-
-#ifndef pci_enable_wake
-#define pci_enable_wake _kc_pci_enable_wake
-extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
-#endif
-
-#ifndef pci_disable_device
-#define pci_disable_device _kc_pci_disable_device
-extern void _kc_pci_disable_device(struct pci_dev *pdev);
-#endif
-
-/* PCI PM entry point syntax changed, so don't support suspend/resume */
-#undef CONFIG_PM
-
-#endif /* 2.4.6 => 2.4.3 */
-
-#ifndef HAVE_PCI_SET_MWI
-#define pci_set_mwi(X) pci_write_config_word(X, \
-			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
-			       PCI_COMMAND_INVALIDATE);
-#define pci_clear_mwi(X) pci_write_config_word(X, \
-			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
-			       ~PCI_COMMAND_INVALIDATE);
-#endif
-
-/*****************************************************************************/
-/* 2.4.10 => 2.4.9 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
-
-/**************************************/
-/* MODULE API */
-
-#ifndef MODULE_LICENSE
-	#define MODULE_LICENSE(X)
-#endif
-
-/**************************************/
-/* OTHER */
-
-#undef min
-#define min(x,y) ({ \
-	const typeof(x) _x = (x);	\
-	const typeof(y) _y = (y);	\
-	(void) (&_x == &_y);		\
-	_x < _y ? _x : _y; })
-
-#undef max
-#define max(x,y) ({ \
-	const typeof(x) _x = (x);	\
-	const typeof(y) _y = (y);	\
-	(void) (&_x == &_y);		\
-	_x > _y ? _x : _y; })
-
-#define min_t(type,x,y) ({ \
-	type _x = (x); \
-	type _y = (y); \
-	_x < _y ? _x : _y; })
-
-#define max_t(type,x,y) ({ \
-	type _x = (x); \
-	type _y = (y); \
-	_x > _y ? _x : _y; })
-
-#ifndef list_for_each_safe
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-#endif
-
-#ifndef ____cacheline_aligned_in_smp
-#ifdef CONFIG_SMP
-#define ____cacheline_aligned_in_smp ____cacheline_aligned
-#else
-#define ____cacheline_aligned_in_smp
-#endif /* CONFIG_SMP */
-#endif
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
-#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
-extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
-#else /* 2.4.8 => 2.4.9 */
-extern int snprintf(char * buf, size_t size, const char *fmt, ...);
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-#endif
-#endif /* 2.4.10 -> 2.4.6 */
-
-
-/*****************************************************************************/
-/* 2.4.12 => 2.4.10 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
-#ifndef HAVE_NETIF_MSG
-#define HAVE_NETIF_MSG 1
-enum {
-	NETIF_MSG_DRV		= 0x0001,
-	NETIF_MSG_PROBE		= 0x0002,
-	NETIF_MSG_LINK		= 0x0004,
-	NETIF_MSG_TIMER		= 0x0008,
-	NETIF_MSG_IFDOWN	= 0x0010,
-	NETIF_MSG_IFUP		= 0x0020,
-	NETIF_MSG_RX_ERR	= 0x0040,
-	NETIF_MSG_TX_ERR	= 0x0080,
-	NETIF_MSG_TX_QUEUED	= 0x0100,
-	NETIF_MSG_INTR		= 0x0200,
-	NETIF_MSG_TX_DONE	= 0x0400,
-	NETIF_MSG_RX_STATUS	= 0x0800,
-	NETIF_MSG_PKTDATA	= 0x1000,
-	NETIF_MSG_HW		= 0x2000,
-	NETIF_MSG_WOL		= 0x4000,
-};
-
-#define netif_msg_drv(p)	((p)->msg_enable & NETIF_MSG_DRV)
-#define netif_msg_probe(p)	((p)->msg_enable & NETIF_MSG_PROBE)
-#define netif_msg_link(p)	((p)->msg_enable & NETIF_MSG_LINK)
-#define netif_msg_timer(p)	((p)->msg_enable & NETIF_MSG_TIMER)
-#define netif_msg_ifdown(p)	((p)->msg_enable & NETIF_MSG_IFDOWN)
-#define netif_msg_ifup(p)	((p)->msg_enable & NETIF_MSG_IFUP)
-#define netif_msg_rx_err(p)	((p)->msg_enable & NETIF_MSG_RX_ERR)
-#define netif_msg_tx_err(p)	((p)->msg_enable & NETIF_MSG_TX_ERR)
-#define netif_msg_tx_queued(p)	((p)->msg_enable & NETIF_MSG_TX_QUEUED)
-#define netif_msg_intr(p)	((p)->msg_enable & NETIF_MSG_INTR)
-#define netif_msg_tx_done(p)	((p)->msg_enable & NETIF_MSG_TX_DONE)
-#define netif_msg_rx_status(p)	((p)->msg_enable & NETIF_MSG_RX_STATUS)
-#define netif_msg_pktdata(p)	((p)->msg_enable & NETIF_MSG_PKTDATA)
-#endif /* !HAVE_NETIF_MSG */
-#endif /* 2.4.12 => 2.4.10 */
-
-/*****************************************************************************/
-/* 2.4.13 => 2.4.12 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#ifndef virt_to_page
-	#define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
-#endif
-
-#ifndef pci_map_page
-#define pci_map_page _kc_pci_map_page
-extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
-#endif
-
-#ifndef pci_unmap_page
-#define pci_unmap_page _kc_pci_unmap_page
-extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
-#endif
-
-/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
-
-#undef DMA_32BIT_MASK
-#define DMA_32BIT_MASK	0xffffffff
-#undef DMA_64BIT_MASK
-#define DMA_64BIT_MASK	0xffffffff
-
-/**************************************/
-/* OTHER */
-
-#ifndef cpu_relax
-#define cpu_relax()	rep_nop()
-#endif
-
-struct vlan_ethhdr {
-	unsigned char h_dest[ETH_ALEN];
-	unsigned char h_source[ETH_ALEN];
-	unsigned short h_vlan_proto;
-	unsigned short h_vlan_TCI;
-	unsigned short h_vlan_encapsulated_proto;
-};
-#endif /* 2.4.13 => 2.4.12 */
-
-/*****************************************************************************/
-/* 2.4.17 => 2.4.12 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
-
-#ifndef __devexit_p
-	#define __devexit_p(x) &(x)
-#endif
-
-#endif /* 2.4.17 => 2.4.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
-#define NETIF_MSG_HW	0x2000
-#define NETIF_MSG_WOL	0x4000
-
-#ifndef netif_msg_hw
-#define netif_msg_hw(p)		((p)->msg_enable & NETIF_MSG_HW)
-#endif
-#ifndef netif_msg_wol
-#define netif_msg_wol(p)	((p)->msg_enable & NETIF_MSG_WOL)
-#endif
-#endif /* 2.4.18 */
-
-/*****************************************************************************/
-
-/*****************************************************************************/
-/* 2.4.20 => 2.4.19 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
-
-/* we won't support NAPI on less than 2.4.20 */
-#ifdef NAPI
-#undef NAPI
-#undef CONFIG_IXGBE_NAPI
-#endif
-
-#endif /* 2.4.20 => 2.4.19 */
-
-/*****************************************************************************/
-/* 2.4.22 => 2.4.17 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
-#define pci_name(x)	((x)->slot_name)
-#endif
-
-/*****************************************************************************/
-/* 2.4.22 => 2.4.17 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
-#ifndef IXGBE_NO_LRO
-/* Don't enable LRO for these legacy kernels */
-#define IXGBE_NO_LRO
-#endif
-#endif
-
-/*****************************************************************************/
-/*****************************************************************************/
-/* 2.4.23 => 2.4.22 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
-/*****************************************************************************/
-#ifdef NAPI
-#ifndef netif_poll_disable
-#define netif_poll_disable(x) _kc_netif_poll_disable(x)
-static inline void _kc_netif_poll_disable(struct net_device *netdev)
-{
-	while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
-		/* No hurry */
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(1);
-	}
-}
-#endif
-#ifndef netif_poll_enable
-#define netif_poll_enable(x) _kc_netif_poll_enable(x)
-static inline void _kc_netif_poll_enable(struct net_device *netdev)
-{
-	clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
-}
-#endif
-#endif /* NAPI */
-#ifndef netif_tx_disable
-#define netif_tx_disable(x) _kc_netif_tx_disable(x)
-static inline void _kc_netif_tx_disable(struct net_device *dev)
-{
-	spin_lock_bh(&dev->xmit_lock);
-	netif_stop_queue(dev);
-	spin_unlock_bh(&dev->xmit_lock);
-}
-#endif
-#else /* 2.4.23 => 2.4.22 */
-#define HAVE_SCTP
-#endif /* 2.4.23 => 2.4.22 */
-
-/*****************************************************************************/
-/* 2.6.4 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
-    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
-      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
-#define ETHTOOL_OPS_COMPAT
-#endif /* 2.6.4 => 2.6.0 */
-
-/*****************************************************************************/
-/* 2.5.71 => 2.4.x */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
-#define sk_protocol protocol
-#define pci_get_device pci_find_device
-#endif /* 2.5.70 => 2.4.x */
-
-/*****************************************************************************/
-/* < 2.4.27 or 2.6.0 <= 2.6.5 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
-    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
-      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
-
-#ifndef netif_msg_init
-#define netif_msg_init _kc_netif_msg_init
-static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
-{
-	/* use default */
-	if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
-		return default_msg_enable_bits;
-	if (debug_value == 0) /* no output */
-		return 0;
-	/* set low N bits */
-	return (1 << debug_value) -1;
-}
-#endif
-
-#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
-/*****************************************************************************/
-#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
-     (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
-      ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
-#define netdev_priv(x) x->priv
-#endif
-
-/*****************************************************************************/
-/* <= 2.5.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
-#include <linux/rtnetlink.h>
-#undef pci_register_driver
-#define pci_register_driver pci_module_init
-
-/*
- * Most of the dma compat code is copied/modified from the 2.4.37
- * /include/linux/libata-compat.h header file
- */
-/* These definitions mirror those in pci.h, so they can be used
- * interchangeably with their PCI_ counterparts */
-enum dma_data_direction {
-	DMA_BIDIRECTIONAL = 0,
-	DMA_TO_DEVICE = 1,
-	DMA_FROM_DEVICE = 2,
-	DMA_NONE = 3,
-};
-
-struct device {
-	struct pci_dev pdev;
-};
-
-static inline struct pci_dev *to_pci_dev (struct device *dev)
-{
-	return (struct pci_dev *) dev;
-}
-static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
-{
-	return (struct device *) pdev;
-}
-
-#define pdev_printk(lvl, pdev, fmt, args...)	\
-	printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
-#define dev_err(dev, fmt, args...)            \
-	pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
-#define dev_info(dev, fmt, args...)            \
-	pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
-#define dev_warn(dev, fmt, args...)            \
-	pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
-
-/* NOTE: dangerous! we ignore the 'gfp' argument */
-#define dma_alloc_coherent(dev,sz,dma,gfp) \
-	pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
-#define dma_free_coherent(dev,sz,addr,dma_addr) \
-	pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
-
-#define dma_map_page(dev,a,b,c,d) \
-	pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
-#define dma_unmap_page(dev,a,b,c) \
-	pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
-
-#define dma_map_single(dev,a,b,c) \
-	pci_map_single(to_pci_dev(dev),(a),(b),(c))
-#define dma_unmap_single(dev,a,b,c) \
-	pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
-
-#define dma_sync_single(dev,a,b,c) \
-	pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
-
-/* for range just sync everything, that's all the pci API can do */
-#define dma_sync_single_range(dev,addr,off,sz,dir) \
-	pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
-
-#define dma_set_mask(dev,mask) \
-	pci_set_dma_mask(to_pci_dev(dev),(mask))
-
-/* hlist_* code - double linked lists */
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-	next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = NULL;
-	n->pprev = NULL;
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each_entry(tpos, pos, head, member)                    \
-	for (pos = (head)->first;                                        \
-	     pos && ({ prefetch(pos->next); 1;}) &&                      \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member)            \
-	for (pos = (head)->first;                                        \
-	     pos && ({ n = pos->next; 1; }) &&                           \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
-
-#ifndef might_sleep
-#define might_sleep()
-#endif
-#else
-static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
-{
-	return &pdev->dev;
-}
-#endif /* <= 2.5.0 */
-
-/*****************************************************************************/
-/* 2.5.28 => 2.4.23 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
-
-static inline void _kc_synchronize_irq(void)
-{
-	synchronize_irq();
-}
-#undef synchronize_irq
-#define synchronize_irq(X) _kc_synchronize_irq()
-
-#include <linux/tqueue.h>
-#define work_struct tq_struct
-#undef INIT_WORK
-#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
-#undef container_of
-#define container_of list_entry
-#define schedule_work schedule_task
-#define flush_scheduled_work flush_scheduled_tasks
-#define cancel_work_sync(x) flush_scheduled_work()
-
-#endif /* 2.5.28 => 2.4.17 */
-
-/*****************************************************************************/
-/* 2.6.0 => 2.5.28 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-#undef get_cpu
-#define get_cpu() smp_processor_id()
-#undef put_cpu
-#define put_cpu() do { } while(0)
-#define MODULE_INFO(version, _version)
-#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
-#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
-#endif
-#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
-
-#define dma_set_coherent_mask(dev,mask) 1
-
-#undef dev_put
-#define dev_put(dev) __dev_put(dev)
-
-#ifndef skb_fill_page_desc
-#define skb_fill_page_desc _kc_skb_fill_page_desc
-extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
-#endif
-
-#undef ALIGN
-#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
-
-#ifndef page_count
-#define page_count(p) atomic_read(&(p)->count)
-#endif
-
-#ifdef MAX_NUMNODES
-#undef MAX_NUMNODES
-#endif
-#define MAX_NUMNODES 1
-
-/* find_first_bit and find_next bit are not defined for most
- * 2.4 kernels (except for the redhat 2.4.21 kernels
- */
-#include <linux/bitops.h>
-#define BITOP_WORD(nr)          ((nr) / BITS_PER_LONG)
-#undef find_next_bit
-#define find_next_bit _kc_find_next_bit
-extern unsigned long _kc_find_next_bit(const unsigned long *addr,
-                                       unsigned long size,
-                                       unsigned long offset);
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-
-
-#ifndef netdev_name
-static inline const char *_kc_netdev_name(const struct net_device *dev)
-{
-	if (strchr(dev->name, '%'))
-		return "(unregistered net_device)";
-	return dev->name;
-}
-#define netdev_name(netdev)	_kc_netdev_name(netdev)
-#endif /* netdev_name */
-
-#ifndef strlcpy
-#define strlcpy _kc_strlcpy
-extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
-#endif /* strlcpy */
-
-#endif /* 2.6.0 => 2.5.28 */
-
-/*****************************************************************************/
-/* 2.6.4 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
-#endif /* 2.6.4 => 2.6.0 */
-
-/*****************************************************************************/
-/* 2.6.5 => 2.6.0 */
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
-#define dma_sync_single_for_cpu		dma_sync_single
-#define dma_sync_single_for_device	dma_sync_single
-#define dma_sync_single_range_for_cpu		dma_sync_single_range
-#define dma_sync_single_range_for_device	dma_sync_single_range
-#ifndef pci_dma_mapping_error
-#define pci_dma_mapping_error _kc_pci_dma_mapping_error
-static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
-{
-	return dma_addr == 0;
-}
-#endif
-#endif /* 2.6.5 => 2.6.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
-#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
-/* taken from 2.6 include/linux/bitmap.h */
-#undef bitmap_zero
-#define bitmap_zero _kc_bitmap_zero
-static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
-{
-        if (nbits <= BITS_PER_LONG)
-                *dst = 0UL;
-        else {
-                int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-                memset(dst, 0, len);
-        }
-}
-#define random_ether_addr _kc_random_ether_addr
-static inline void _kc_random_ether_addr(u8 *addr)
-{
-        get_random_bytes(addr, ETH_ALEN);
-        addr[0] &= 0xfe; /* clear multicast */
-        addr[0] |= 0x02; /* set local assignment */
-}
-#define page_to_nid(x) 0
-
-#endif /* < 2.6.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
-#undef if_mii
-#define if_mii _kc_if_mii
-static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
-{
-	return (struct mii_ioctl_data *) &rq->ifr_ifru;
-}
-
-#ifndef __force
-#define __force
-#endif
-#endif /* < 2.6.7 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
-#ifndef PCI_EXP_DEVCTL
-#define PCI_EXP_DEVCTL 8
-#endif
-#ifndef PCI_EXP_DEVCTL_CERE
-#define PCI_EXP_DEVCTL_CERE 0x0001
-#endif
-#define msleep(x)	do { set_current_state(TASK_UNINTERRUPTIBLE); \
-				schedule_timeout((x * HZ)/1000 + 2); \
-			} while (0)
-
-#endif /* < 2.6.8 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
-#include <net/dsfield.h>
-#define __iomem
-
-#ifndef kcalloc
-#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
-extern void *_kc_kzalloc(size_t size, int flags);
-#endif
-#define MSEC_PER_SEC    1000L
-static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
-{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-	return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
-#else
-	return (j * MSEC_PER_SEC) / HZ;
-#endif
-}
-static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
-{
-	if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
-		return MAX_JIFFY_OFFSET;
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
-	return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
-	return m * (HZ / MSEC_PER_SEC);
-#else
-	return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
-#endif
-}
-
-#define msleep_interruptible _kc_msleep_interruptible
-static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
-{
-	unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
-
-	while (timeout && !signal_pending(current)) {
-		__set_current_state(TASK_INTERRUPTIBLE);
-		timeout = schedule_timeout(timeout);
-	}
-	return _kc_jiffies_to_msecs(timeout);
-}
-
-/* Basic mode control register. */
-#define BMCR_SPEED1000		0x0040  /* MSB of Speed (1000)         */
-
-#ifndef __le16
-#define __le16 u16
-#endif
-#ifndef __le32
-#define __le32 u32
-#endif
-#ifndef __le64
-#define __le64 u64
-#endif
-#ifndef __be16
-#define __be16 u16
-#endif
-#ifndef __be32
-#define __be32 u32
-#endif
-#ifndef __be64
-#define __be64 u64
-#endif
-
-static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
-{
-	return (struct vlan_ethhdr *)skb->mac.raw;
-}
-
-/* Wake-On-Lan options. */
-#define WAKE_PHY		(1 << 0)
-#define WAKE_UCAST		(1 << 1)
-#define WAKE_MCAST		(1 << 2)
-#define WAKE_BCAST		(1 << 3)
-#define WAKE_ARP		(1 << 4)
-#define WAKE_MAGIC		(1 << 5)
-#define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
-
-#define skb_header_pointer _kc_skb_header_pointer
-static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
-					    int offset, int len, void *buffer)
-{
-	int hlen = skb_headlen(skb);
-
-	if (hlen - offset >= len)
-		return skb->data + offset;
-
-#ifdef MAX_SKB_FRAGS
-	if (skb_copy_bits(skb, offset, buffer, len) < 0)
-		return NULL;
-
-	return buffer;
-#else
-	return NULL;
-#endif
-
-#ifndef NETDEV_TX_OK
-#define NETDEV_TX_OK 0
-#endif
-#ifndef NETDEV_TX_BUSY
-#define NETDEV_TX_BUSY 1
-#endif
-#ifndef NETDEV_TX_LOCKED
-#define NETDEV_TX_LOCKED -1
-#endif
-}
-
-#ifndef __bitwise
-#define __bitwise
-#endif
-#endif /* < 2.6.9 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-#ifdef module_param_array_named
-#undef module_param_array_named
-#define module_param_array_named(name, array, type, nump, perm)          \
-	static struct kparam_array __param_arr_##name                    \
-	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
-	    sizeof(array[0]), array };                                   \
-	module_param_call(name, param_array_set, param_array_get,        \
-			  &__param_arr_##name, perm)
-#endif /* module_param_array_named */
-/*
- * num_online is broken for all < 2.6.10 kernels.  This is needed to support
- * Node module parameter of ixgbe.
- */
-#undef num_online_nodes
-#define num_online_nodes(n) 1
-extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
-#undef node_online_map
-#define node_online_map _kcompat_node_online_map
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
-#define PCI_D0      0
-#define PCI_D1      1
-#define PCI_D2      2
-#define PCI_D3hot   3
-#define PCI_D3cold  4
-typedef int pci_power_t;
-#define pci_choose_state(pdev,state) state
-#define PMSG_SUSPEND 3
-#define PCI_EXP_LNKCTL	16
-
-#undef NETIF_F_LLTX
-
-#ifndef ARCH_HAS_PREFETCH
-#define prefetch(X)
-#endif
-
-#ifndef NET_IP_ALIGN
-#define NET_IP_ALIGN 2
-#endif
-
-#define KC_USEC_PER_SEC	1000000L
-#define usecs_to_jiffies _kc_usecs_to_jiffies
-static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
-{
-#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
-	return (KC_USEC_PER_SEC / HZ) * j;
-#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
-	return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
-#else
-	return (j * KC_USEC_PER_SEC) / HZ;
-#endif
-}
-static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
-{
-	if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
-		return MAX_JIFFY_OFFSET;
-#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
-	return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
-#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
-	return m * (HZ / KC_USEC_PER_SEC);
-#else
-	return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
-#endif
-}
-#endif /* < 2.6.11 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
-#include <linux/reboot.h>
-#define USE_REBOOT_NOTIFIER
-
-/* Generic MII registers. */
-#define MII_CTRL1000        0x09        /* 1000BASE-T control          */
-#define MII_STAT1000        0x0a        /* 1000BASE-T status           */
-/* Advertisement control register. */
-#define ADVERTISE_PAUSE_CAP     0x0400  /* Try for pause               */
-#define ADVERTISE_PAUSE_ASYM    0x0800  /* Try for asymmetric pause     */
-/* 1000BASE-T Control register */
-#define ADVERTISE_1000FULL      0x0200  /* Advertise 1000BASE-T full duplex */
-#ifndef is_zero_ether_addr
-#define is_zero_ether_addr _kc_is_zero_ether_addr
-static inline int _kc_is_zero_ether_addr(const u8 *addr)
-{
-	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
-}
-#endif /* is_zero_ether_addr */
-#ifndef is_multicast_ether_addr
-#define is_multicast_ether_addr _kc_is_multicast_ether_addr
-static inline int _kc_is_multicast_ether_addr(const u8 *addr)
-{
-	return addr[0] & 0x01;
-}
-#endif /* is_multicast_ether_addr */
-#endif /* < 2.6.12 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-#ifndef kstrdup
-#define kstrdup _kc_kstrdup
-extern char *_kc_kstrdup(const char *s, unsigned int gfp);
-#endif
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-#define pm_message_t u32
-#ifndef kzalloc
-#define kzalloc _kc_kzalloc
-extern void *_kc_kzalloc(size_t size, int flags);
-#endif
-
-/* Generic MII registers. */
-#define MII_ESTATUS	    0x0f	/* Extended Status */
-/* Basic mode status register. */
-#define BMSR_ESTATEN		0x0100	/* Extended Status in R15 */
-/* Extended status register. */
-#define ESTATUS_1000_TFULL	0x2000	/* Can do 1000BT Full */
-#define ESTATUS_1000_THALF	0x1000	/* Can do 1000BT Half */
-
-#define ADVERTISED_Pause	(1 << 13)
-#define ADVERTISED_Asym_Pause	(1 << 14)
-
-#if (!(RHEL_RELEASE_CODE && \
-       (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
-       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
-#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
-#define gfp_t unsigned
-#else
-typedef unsigned gfp_t;
-#endif
-#endif /* !RHEL4.3->RHEL5.0 */
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
-#ifdef CONFIG_X86_64
-#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir)       \
-	dma_sync_single_for_cpu(dev, dma_handle, size, dir)
-#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir)    \
-	dma_sync_single_for_device(dev, dma_handle, size, dir)
-#endif
-#endif
-#endif /* < 2.6.14 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
-#ifndef vmalloc_node
-#define vmalloc_node(a,b) vmalloc(a)
-#endif /* vmalloc_node*/
-
-#define setup_timer(_timer, _function, _data) \
-do { \
-	(_timer)->function = _function; \
-	(_timer)->data = _data; \
-	init_timer(_timer); \
-} while (0)
-#ifndef device_can_wakeup
-#define device_can_wakeup(dev)	(1)
-#endif
-#ifndef device_set_wakeup_enable
-#define device_set_wakeup_enable(dev, val)	do{}while(0)
-#endif
-#ifndef device_init_wakeup
-#define device_init_wakeup(dev,val) do {} while (0)
-#endif
-static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
-{
-	const u16 *a = (const u16 *) addr1;
-	const u16 *b = (const u16 *) addr2;
-
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
-}
-#undef compare_ether_addr
-#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
-#endif /* < 2.6.15 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
-#undef DEFINE_MUTEX
-#define DEFINE_MUTEX(x)	DECLARE_MUTEX(x)
-#define mutex_lock(x)	down_interruptible(x)
-#define mutex_unlock(x)	up(x)
-
-#ifndef ____cacheline_internodealigned_in_smp
-#ifdef CONFIG_SMP
-#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
-#else
-#define ____cacheline_internodealigned_in_smp
-#endif /* CONFIG_SMP */
-#endif /* ____cacheline_internodealigned_in_smp */
-#undef HAVE_PCI_ERS
-#else /* 2.6.16 and above */
-#undef HAVE_PCI_ERS
-#define HAVE_PCI_ERS
-#endif /* < 2.6.16 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
-#ifndef first_online_node
-#define first_online_node 0
-#endif
-#ifndef NET_SKB_PAD
-#define NET_SKB_PAD 16
-#endif
-#endif /* < 2.6.17 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
-
-#ifndef IRQ_HANDLED
-#define irqreturn_t void
-#define IRQ_HANDLED
-#define IRQ_NONE
-#endif
-
-#ifndef IRQF_PROBE_SHARED
-#ifdef SA_PROBEIRQ
-#define IRQF_PROBE_SHARED SA_PROBEIRQ
-#else
-#define IRQF_PROBE_SHARED 0
-#endif
-#endif
-
-#ifndef IRQF_SHARED
-#define IRQF_SHARED SA_SHIRQ
-#endif
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-#ifndef FIELD_SIZEOF
-#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-#endif
-
-#ifndef skb_is_gso
-#ifdef NETIF_F_TSO
-#define skb_is_gso _kc_skb_is_gso
-static inline int _kc_skb_is_gso(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->gso_size;
-}
-#else
-#define skb_is_gso(a) 0
-#endif
-#endif
-
-#ifndef resource_size_t
-#define resource_size_t unsigned long
-#endif
-
-#ifdef skb_pad
-#undef skb_pad
-#endif
-#define skb_pad(x,y) _kc_skb_pad(x, y)
-int _kc_skb_pad(struct sk_buff *skb, int pad);
-#ifdef skb_padto
-#undef skb_padto
-#endif
-#define skb_padto(x,y) _kc_skb_padto(x, y)
-static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
-{
-	unsigned int size = skb->len;
-	if(likely(size >= len))
-		return 0;
-	return _kc_skb_pad(skb, len - size);
-}
-
-#ifndef DECLARE_PCI_UNMAP_ADDR
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
-	dma_addr_t ADDR_NAME
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
-	u32 LEN_NAME
-#define pci_unmap_addr(PTR, ADDR_NAME) \
-	((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
-	(((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME) \
-	((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
-	(((PTR)->LEN_NAME) = (VAL))
-#endif /* DECLARE_PCI_UNMAP_ADDR */
-#endif /* < 2.6.18 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-
-#ifndef DIV_ROUND_UP
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#endif
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
-#if (!((RHEL_RELEASE_CODE && \
-        ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
-          RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
-         (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \
-       (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0))))
-typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
-#endif
-#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-#undef CONFIG_INET_LRO
-#undef CONFIG_INET_LRO_MODULE
-#undef CONFIG_FCOE
-#undef CONFIG_FCOE_MODULE
-#endif
-typedef irqreturn_t (*new_handler_t)(int, void*);
-static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
-#else /* 2.4.x */
-typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
-typedef void (*new_handler_t)(int, void*);
-static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
-#endif /* >= 2.5.x */
-{
-	irq_handler_t new_handler = (irq_handler_t) handler;
-	return request_irq(irq, new_handler, flags, devname, dev_id);
-}
-
-#undef request_irq
-#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
-
-#define irq_handler_t new_handler_t
-/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-#define PCIE_CONFIG_SPACE_LEN 256
-#define PCI_CONFIG_SPACE_LEN 64
-#define PCIE_LINK_STATUS 0x12
-#define pci_config_space_ich8lan() do {} while(0)
-#undef pci_save_state
-extern int _kc_pci_save_state(struct pci_dev *);
-#define pci_save_state(pdev) _kc_pci_save_state(pdev)
-#undef pci_restore_state
-extern void _kc_pci_restore_state(struct pci_dev *);
-#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-#undef free_netdev
-extern void _kc_free_netdev(struct net_device *);
-#define free_netdev(netdev) _kc_free_netdev(netdev)
-#endif
-static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-{
-	return 0;
-}
-#define pci_disable_pcie_error_reporting(dev) do {} while (0)
-#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
-
-extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
-#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
-#ifndef bool
-#define bool _Bool
-#define true 1
-#define false 0
-#endif
-#else /* 2.6.19 */
-#include <linux/aer.h>
-#include <linux/string.h>
-#endif /* < 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
-#undef INIT_WORK
-#define INIT_WORK(_work, _func) \
-do { \
-	INIT_LIST_HEAD(&(_work)->entry); \
-	(_work)->pending = 0; \
-	(_work)->func = (void (*)(void *))_func; \
-	(_work)->data = _work; \
-	init_timer(&(_work)->timer); \
-} while (0)
-#endif
-
-#ifndef PCI_VDEVICE
-#define PCI_VDEVICE(ven, dev)        \
-	PCI_VENDOR_ID_##ven, (dev),  \
-	PCI_ANY_ID, PCI_ANY_ID, 0, 0
-#endif
-
-#ifndef round_jiffies
-#define round_jiffies(x) x
-#endif
-
-#define csum_offset csum
-
-#define HAVE_EARLY_VMALLOC_NODE
-#define dev_to_node(dev) -1
-#undef set_dev_node
-/* remove compiler warning with b=b, for unused variable */
-#define set_dev_node(a, b) do { (b) = (b); } while(0)
-
-#if (!(RHEL_RELEASE_CODE && \
-       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
-        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
-     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
-typedef __u16 __bitwise __sum16;
-typedef __u32 __bitwise __wsum;
-#endif
-
-#if (!(RHEL_RELEASE_CODE && \
-       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
-         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
-        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
-     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
-static inline __wsum csum_unfold(__sum16 n)
-{
-	return (__force __wsum)n;
-}
-#endif
-
-#else /* < 2.6.20 */
-#define HAVE_DEVICE_NUMA_NODE
-#endif /* < 2.6.20 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-#define to_net_dev(class) container_of(class, struct net_device, class_dev)
-#define NETDEV_CLASS_DEV
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
-#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
-#define vlan_group_set_device(vg, id, dev)		\
-	do {						\
-		if (vg) vg->vlan_devices[id] = dev;	\
-	} while (0)
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
-#define pci_channel_offline(pdev) (pdev->error_state && \
-	pdev->error_state != pci_channel_io_normal)
-#define pci_request_selected_regions(pdev, bars, name) \
-        pci_request_regions(pdev, name)
-#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
-#endif /* < 2.6.21 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-#define tcp_hdr(skb) (skb->h.th)
-#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
-#define skb_transport_offset(skb) (skb->h.raw - skb->data)
-#define skb_transport_header(skb) (skb->h.raw)
-#define ipv6_hdr(skb) (skb->nh.ipv6h)
-#define ip_hdr(skb) (skb->nh.iph)
-#define skb_network_offset(skb) (skb->nh.raw - skb->data)
-#define skb_network_header(skb) (skb->nh.raw)
-#define skb_tail_pointer(skb) skb->tail
-#define skb_reset_tail_pointer(skb) \
-	do { \
-		skb->tail = skb->data; \
-	} while (0)
-#define skb_copy_to_linear_data(skb, from, len) \
-				memcpy(skb->data, from, len)
-#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
-				memcpy(skb->data + offset, from, len)
-#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
-#define pci_register_driver pci_module_init
-#define skb_mac_header(skb) skb->mac.raw
-
-#ifdef NETIF_F_MULTI_QUEUE
-#ifndef alloc_etherdev_mq
-#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
-#endif
-#endif /* NETIF_F_MULTI_QUEUE */
-
-#ifndef ETH_FCS_LEN
-#define ETH_FCS_LEN 4
-#endif
-#define cancel_work_sync(x) flush_scheduled_work()
-#ifndef udp_hdr
-#define udp_hdr _udp_hdr
-static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
-{
-	return (struct udphdr *)skb_transport_header(skb);
-}
-#endif
-
-#ifdef cpu_to_be16
-#undef cpu_to_be16
-#endif
-#define cpu_to_be16(x) __constant_htons(x)
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
-enum {
-	DUMP_PREFIX_NONE,
-	DUMP_PREFIX_ADDRESS,
-	DUMP_PREFIX_OFFSET
-};
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
-#ifndef hex_asc
-#define hex_asc(x)	"0123456789abcdef"[x]
-#endif
-#include <linux/ctype.h>
-extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
-			       int prefix_type, int rowsize, int groupsize,
-			       const void *buf, size_t len, bool ascii);
-#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
-		_kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
-#else /* 2.6.22 */
-#define ETH_TYPE_TRANS_SETS_DEV
-#define HAVE_NETDEV_STATS_IN_NETDEV
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
-#endif /* > 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
-#define netif_subqueue_stopped(_a, _b) 0
-#ifndef PTR_ALIGN
-#define PTR_ALIGN(p, a)         ((typeof(p))ALIGN((unsigned long)(p), (a)))
-#endif
-
-#ifndef CONFIG_PM_SLEEP
-#define CONFIG_PM_SLEEP	CONFIG_PM
-#endif
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
-#define HAVE_ETHTOOL_GET_PERM_ADDR
-#endif /* 2.6.14 through 2.6.22 */
-#endif /* < 2.6.23 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifndef ETH_FLAG_LRO
-#define ETH_FLAG_LRO NETIF_F_LRO
-#endif
-
-/* if GRO is supported then the napi struct must already exist */
-#ifndef NETIF_F_GRO
-/* NAPI API changes in 2.6.24 break everything */
-struct napi_struct {
-	/* used to look up the real NAPI polling routine */
-	int (*poll)(struct napi_struct *, int);
-	struct net_device *dev;
-	int weight;
-};
-#endif
-
-#ifdef NAPI
-extern int __kc_adapter_clean(struct net_device *, int *);
-extern struct net_device *napi_to_poll_dev(struct napi_struct *napi);
-#define netif_napi_add(_netdev, _napi, _poll, _weight) \
-	do { \
-		struct napi_struct *__napi = (_napi); \
-		struct net_device *poll_dev = napi_to_poll_dev(__napi); \
-		poll_dev->poll = &(__kc_adapter_clean); \
-		poll_dev->priv = (_napi); \
-		poll_dev->weight = (_weight); \
-		set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
-		set_bit(__LINK_STATE_START, &poll_dev->state);\
-		dev_hold(poll_dev); \
-		__napi->poll = &(_poll); \
-		__napi->weight = (_weight); \
-		__napi->dev = (_netdev); \
-	} while (0)
-#define netif_napi_del(_napi) \
-	do { \
-		struct net_device *poll_dev = napi_to_poll_dev(_napi); \
-		WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
-		dev_put(poll_dev); \
-		memset(poll_dev, 0, sizeof(struct net_device));\
-	} while (0)
-#define napi_schedule_prep(_napi) \
-	(netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
-#define napi_schedule(_napi) \
-	do { \
-		if (napi_schedule_prep(_napi)) \
-			__netif_rx_schedule(napi_to_poll_dev(_napi)); \
-	} while (0)
-#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
-#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
-#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
-#ifndef NETIF_F_GRO
-#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
-#else
-#define napi_complete(_napi) \
-	do { \
-		napi_gro_flush(_napi); \
-		netif_rx_complete(napi_to_poll_dev(_napi)); \
-	} while (0)
-#endif /* NETIF_F_GRO */
-#else /* NAPI */
-#define netif_napi_add(_netdev, _napi, _poll, _weight) \
-	do { \
-		struct napi_struct *__napi = _napi; \
-		_netdev->poll = &(_poll); \
-		_netdev->weight = (_weight); \
-		__napi->poll = &(_poll); \
-		__napi->weight = (_weight); \
-		__napi->dev = (_netdev); \
-	} while (0)
-#define netif_napi_del(_a) do {} while (0)
-#endif /* NAPI */
-
-#undef dev_get_by_name
-#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
-#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
-#ifndef DMA_BIT_MASK
-#define DMA_BIT_MASK(n)	(((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
-#endif
-
-#ifdef NETIF_F_TSO6
-#define skb_is_gso_v6 _kc_skb_is_gso_v6
-static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
-}
-#endif /* NETIF_F_TSO6 */
-
-#ifndef KERN_CONT
-#define KERN_CONT	""
-#endif
-#else /* < 2.6.24 */
-#define HAVE_ETHTOOL_GET_SSET_COUNT
-#define HAVE_NETDEV_NAPI_LIST
-#endif /* < 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
-#include <linux/pm_qos_params.h>
-#else /* >= 3.2.0 */
-#include <linux/pm_qos.h>
-#endif /* else >= 3.2.0 */
-#endif /* > 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
-#define PM_QOS_CPU_DMA_LATENCY	1
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
-#include <linux/latency.h>
-#define PM_QOS_DEFAULT_VALUE	INFINITE_LATENCY
-#define pm_qos_add_requirement(pm_qos_class, name, value) \
-		set_acceptable_latency(name, value)
-#define pm_qos_remove_requirement(pm_qos_class, name) \
-		remove_acceptable_latency(name)
-#define pm_qos_update_requirement(pm_qos_class, name, value) \
-		modify_acceptable_latency(name, value)
-#else
-#define PM_QOS_DEFAULT_VALUE	-1
-#define pm_qos_add_requirement(pm_qos_class, name, value)
-#define pm_qos_remove_requirement(pm_qos_class, name)
-#define pm_qos_update_requirement(pm_qos_class, name, value) { \
-	if (value != PM_QOS_DEFAULT_VALUE) { \
-		printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
-			pci_name(adapter->pdev)); \
-	} \
-}
-
-#endif /* > 2.6.18 */
-
-#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
-
-#ifndef DEFINE_PCI_DEVICE_TABLE
-#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
-#endif /* DEFINE_PCI_DEVICE_TABLE */
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-#ifndef IXGBE_PROCFS
-#define IXGBE_PROCFS
-#endif /* IXGBE_PROCFS */
-#endif /* >= 2.6.0 */
-
-
-#else /* < 2.6.25 */
-
-#ifndef IXGBE_SYSFS
-#define IXGBE_SYSFS
-#endif /* IXGBE_SYSFS */
-
-
-#endif /* < 2.6.25 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-#ifndef clamp_t
-#define clamp_t(type, val, min, max) ({		\
-	type __val = (val);			\
-	type __min = (min);			\
-	type __max = (max);			\
-	__val = __val < __min ? __min : __val;	\
-	__val > __max ? __max : __val; })
-#endif /* clamp_t */
-#ifdef NETIF_F_TSO
-#ifdef NETIF_F_TSO6
-#define netif_set_gso_max_size(_netdev, size) \
-	do { \
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \
-			_netdev->features &= ~NETIF_F_TSO; \
-			_netdev->features &= ~NETIF_F_TSO6; \
-		} else { \
-			_netdev->features |= NETIF_F_TSO; \
-			_netdev->features |= NETIF_F_TSO6; \
-		} \
-	} while (0)
-#else /* NETIF_F_TSO6 */
-#define netif_set_gso_max_size(_netdev, size) \
-	do { \
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
-			_netdev->features &= ~NETIF_F_TSO; \
-		else \
-			_netdev->features |= NETIF_F_TSO; \
-	} while (0)
-#endif /* NETIF_F_TSO6 */
-#else
-#define netif_set_gso_max_size(_netdev, size) do {} while (0)
-#endif /* NETIF_F_TSO */
-#undef kzalloc_node
-#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
-
-extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
-#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
-#else /* < 2.6.26 */
-#include <linux/pci-aspm.h>
-#define HAVE_NETDEV_VLAN_FEATURES
-#endif /* < 2.6.26 */
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
-					     __u32 speed)
-{
-	ep->speed = (__u16)speed;
-	/* ep->speed_hi = (__u16)(speed >> 16); */
-}
-#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
-
-static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
-{
-	/* no speed_hi before 2.6.27, and probably no need for it yet */
-	return (__u32)ep->speed;
-}
-#define ethtool_cmd_speed _kc_ethtool_cmd_speed
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
-#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
-#define ANCIENT_PM 1
-#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
-       (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
-       defined(CONFIG_PM_SLEEP))
-#define NEWER_PM 1
-#endif
-#if defined(ANCIENT_PM) || defined(NEWER_PM)
-#undef device_set_wakeup_enable
-#define device_set_wakeup_enable(dev, val) \
-	do { \
-		u16 pmc = 0; \
-		int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
-		if (pm) { \
-			pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
-				&pmc); \
-		} \
-		(dev)->power.can_wakeup = !!(pmc >> 11); \
-		(dev)->power.should_wakeup = (val && (pmc >> 11)); \
-	} while (0)
-#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
-#endif /* 2.6.15 through 2.6.27 */
-#ifndef netif_napi_del
-#define netif_napi_del(_a) do {} while (0)
-#ifdef NAPI
-#ifdef CONFIG_NETPOLL
-#undef netif_napi_del
-#define netif_napi_del(_a) list_del(&(_a)->dev_list);
-#endif
-#endif
-#endif /* netif_napi_del */
-#ifdef dma_mapping_error
-#undef dma_mapping_error
-#endif
-#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
-
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
-#define HAVE_TX_MQ
-#endif
-
-#ifdef HAVE_TX_MQ
-extern void _kc_netif_tx_stop_all_queues(struct net_device *);
-extern void _kc_netif_tx_wake_all_queues(struct net_device *);
-extern void _kc_netif_tx_start_all_queues(struct net_device *);
-#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
-#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
-#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
-#undef netif_stop_subqueue
-#define netif_stop_subqueue(_ndev,_qi) do { \
-	if (netif_is_multiqueue((_ndev))) \
-		netif_stop_subqueue((_ndev), (_qi)); \
-	else \
-		netif_stop_queue((_ndev)); \
-	} while (0)
-#undef netif_start_subqueue
-#define netif_start_subqueue(_ndev,_qi) do { \
-	if (netif_is_multiqueue((_ndev))) \
-		netif_start_subqueue((_ndev), (_qi)); \
-	else \
-		netif_start_queue((_ndev)); \
-	} while (0)
-#else /* HAVE_TX_MQ */
-#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
-#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
-#define netif_tx_start_all_queues(a) netif_start_queue(a)
-#else
-#define netif_tx_start_all_queues(a) do {} while (0)
-#endif
-#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
-#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
-#endif /* HAVE_TX_MQ */
-#ifndef NETIF_F_MULTI_QUEUE
-#define NETIF_F_MULTI_QUEUE 0
-#define netif_is_multiqueue(a) 0
-#define netif_wake_subqueue(a, b)
-#endif /* NETIF_F_MULTI_QUEUE */
-
-#ifndef __WARN_printf
-extern void __kc_warn_slowpath(const char *file, const int line,
-		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
-#endif /* __WARN_printf */
-
-#ifndef WARN
-#define WARN(condition, format...) ({						\
-	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on))					\
-		__WARN_printf(format);					\
-	unlikely(__ret_warn_on);					\
-})
-#endif /* WARN */
-#else /* < 2.6.27 */
-#define HAVE_TX_MQ
-#define HAVE_NETDEV_SELECT_QUEUE
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-#define pci_ioremap_bar(pdev, bar)	ioremap(pci_resource_start(pdev, bar), \
-					        pci_resource_len(pdev, bar))
-#define pci_wake_from_d3 _kc_pci_wake_from_d3
-#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
-extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
-extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
-#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
-#ifndef __skb_queue_head_init
-static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
-{
-	list->prev = list->next = (struct sk_buff *)list;
-	list->qlen = 0;
-}
-#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
-#endif
-#endif /* < 2.6.28 */
-
-#ifndef skb_add_rx_frag
-#define skb_add_rx_frag _kc_skb_add_rx_frag
-extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int);
-#endif
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
-#ifndef swap
-#define swap(a, b) \
-	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-#endif
-#define pci_request_selected_regions_exclusive(pdev, bars, name) \
-		pci_request_selected_regions(pdev, bars, name)
-#ifndef CONFIG_NR_CPUS
-#define CONFIG_NR_CPUS 1
-#endif /* CONFIG_NR_CPUS */
-#ifndef pcie_aspm_enabled
-#define pcie_aspm_enabled()   (1)
-#endif /* pcie_aspm_enabled */
-#else /* < 2.6.29 */
-#ifndef HAVE_NET_DEVICE_OPS
-#define HAVE_NET_DEVICE_OPS
-#endif
-#ifdef CONFIG_DCB
-#define HAVE_PFC_MODE_ENABLE
-#endif /* CONFIG_DCB */
-#endif /* < 2.6.29 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
-#define skb_rx_queue_recorded(a) false
-#define skb_get_rx_queue(a) 0
-#undef CONFIG_FCOE
-#undef CONFIG_FCOE_MODULE
-extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb);
-#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s)
-#define skb_record_rx_queue(a, b) do {} while (0)
-#ifndef CONFIG_PCI_IOV
-#undef pci_enable_sriov
-#define pci_enable_sriov(a, b) -ENOTSUPP
-#undef pci_disable_sriov
-#define pci_disable_sriov(a) do {} while (0)
-#endif /* CONFIG_PCI_IOV */
-#ifndef pr_cont
-#define pr_cont(fmt, ...) \
-	printk(KERN_CONT fmt, ##__VA_ARGS__)
-#endif /* pr_cont */
-#else
-#define HAVE_ASPM_QUIRKS
-#endif /* < 2.6.30 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
-#define ETH_P_1588 0x88F7
-#define ETH_P_FIP  0x8914
-#ifndef netdev_uc_count
-#define netdev_uc_count(dev) ((dev)->uc_count)
-#endif
-#ifndef netdev_for_each_uc_addr
-#define netdev_for_each_uc_addr(uclist, dev) \
-	for (uclist = dev->uc_list; uclist; uclist = uclist->next)
-#endif
-#else
-#ifndef HAVE_NETDEV_STORAGE_ADDRESS
-#define HAVE_NETDEV_STORAGE_ADDRESS
-#endif
-#ifndef HAVE_NETDEV_HW_ADDR
-#define HAVE_NETDEV_HW_ADDR
-#endif
-#ifndef HAVE_TRANS_START_IN_QUEUE
-#define HAVE_TRANS_START_IN_QUEUE
-#endif
-#endif /* < 2.6.31 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
-#undef netdev_tx_t
-#define netdev_tx_t int
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef NETIF_F_FCOE_MTU
-#define NETIF_F_FCOE_MTU       (1 << 26)
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-
-#ifndef pm_runtime_get_sync
-#define pm_runtime_get_sync(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_put
-#define pm_runtime_put(dev)		do {} while (0)
-#endif
-#ifndef pm_runtime_put_sync
-#define pm_runtime_put_sync(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_resume
-#define pm_runtime_resume(dev)		do {} while (0)
-#endif
-#ifndef pm_schedule_suspend
-#define pm_schedule_suspend(dev, t)	do {} while (0)
-#endif
-#ifndef pm_runtime_set_suspended
-#define pm_runtime_set_suspended(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_disable
-#define pm_runtime_disable(dev)		do {} while (0)
-#endif
-#ifndef pm_runtime_put_noidle
-#define pm_runtime_put_noidle(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_set_active
-#define pm_runtime_set_active(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_enable
-#define pm_runtime_enable(dev)	do {} while (0)
-#endif
-#ifndef pm_runtime_get_noresume
-#define pm_runtime_get_noresume(dev)	do {} while (0)
-#endif
-#else /* < 2.6.32 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
-#define HAVE_NETDEV_OPS_FCOE_ENABLE
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#ifdef CONFIG_DCB
-#ifndef HAVE_DCBNL_OPS_GETAPP
-#define HAVE_DCBNL_OPS_GETAPP
-#endif
-#endif /* CONFIG_DCB */
-#include <linux/pm_runtime.h>
-/* IOV bad DMA target work arounds require at least this kernel rev support */
-#define HAVE_PCIE_TYPE
-#endif /* < 2.6.32 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
-#ifndef pci_pcie_cap
-#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
-#endif
-#ifndef IPV4_FLOW
-#define IPV4_FLOW 0x10
-#endif /* IPV4_FLOW */
-#ifndef IPV6_FLOW
-#define IPV6_FLOW 0x11
-#endif /* IPV6_FLOW */
-/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
-#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
-      (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
-#define HAVE_NETDEV_OPS_FCOE_GETWWN
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#endif /* RHEL6 or SLES11 SP1 */
-#ifndef __percpu
-#define __percpu
-#endif /* __percpu */
-#else /* < 2.6.33 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
-#define HAVE_NETDEV_OPS_FCOE_GETWWN
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#define HAVE_ETHTOOL_SFP_DISPLAY_PORT
-#endif /* < 2.6.33 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
-#ifndef ETH_FLAG_NTUPLE
-#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
-#endif
-
-#ifndef netdev_mc_count
-#define netdev_mc_count(dev) ((dev)->mc_count)
-#endif
-#ifndef netdev_mc_empty
-#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
-#endif
-#ifndef netdev_for_each_mc_addr
-#define netdev_for_each_mc_addr(mclist, dev) \
-	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
-#endif
-#ifndef netdev_uc_count
-#define netdev_uc_count(dev) ((dev)->uc.count)
-#endif
-#ifndef netdev_uc_empty
-#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
-#endif
-#ifndef netdev_for_each_uc_addr
-#define netdev_for_each_uc_addr(ha, dev) \
-	list_for_each_entry(ha, &dev->uc.list, list)
-#endif
-#ifndef dma_set_coherent_mask
-#define dma_set_coherent_mask(dev,mask) \
-	pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
-#endif
-#ifndef pci_dev_run_wake
-#define pci_dev_run_wake(pdev)	(0)
-#endif
-
-/* netdev logging taken from include/linux/netdevice.h */
-#ifndef netdev_name
-static inline const char *_kc_netdev_name(const struct net_device *dev)
-{
-	if (dev->reg_state != NETREG_REGISTERED)
-		return "(unregistered net_device)";
-	return dev->name;
-}
-#define netdev_name(netdev)	_kc_netdev_name(netdev)
-#endif /* netdev_name */
-
-#undef netdev_printk
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-#define netdev_printk(level, netdev, format, args...)		\
-do {								\
-	struct adapter_struct *kc_adapter = netdev_priv(netdev);\
-	struct pci_dev *pdev = kc_adapter->pdev;		\
-	printk("%s %s: " format, level, pci_name(pdev),		\
-	       ##args);						\
-} while(0)
-#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-#define netdev_printk(level, netdev, format, args...)		\
-do {								\
-	struct adapter_struct *kc_adapter = netdev_priv(netdev);\
-	struct pci_dev *pdev = kc_adapter->pdev;		\
-	struct device *dev = pci_dev_to_dev(pdev);		\
-	dev_printk(level, dev, "%s: " format,			\
-		   netdev_name(netdev), ##args);		\
-} while(0)
-#else /* 2.6.21 => 2.6.34 */
-#define netdev_printk(level, netdev, format, args...)		\
-	dev_printk(level, (netdev)->dev.parent,			\
-		   "%s: " format,				\
-		   netdev_name(netdev), ##args)
-#endif /* <2.6.0 <2.6.21 <2.6.34 */
-#undef netdev_emerg
-#define netdev_emerg(dev, format, args...)			\
-	netdev_printk(KERN_EMERG, dev, format, ##args)
-#undef netdev_alert
-#define netdev_alert(dev, format, args...)			\
-	netdev_printk(KERN_ALERT, dev, format, ##args)
-#undef netdev_crit
-#define netdev_crit(dev, format, args...)			\
-	netdev_printk(KERN_CRIT, dev, format, ##args)
-#undef netdev_err
-#define netdev_err(dev, format, args...)			\
-	netdev_printk(KERN_ERR, dev, format, ##args)
-#undef netdev_warn
-#define netdev_warn(dev, format, args...)			\
-	netdev_printk(KERN_WARNING, dev, format, ##args)
-#undef netdev_notice
-#define netdev_notice(dev, format, args...)			\
-	netdev_printk(KERN_NOTICE, dev, format, ##args)
-#undef netdev_info
-#define netdev_info(dev, format, args...)			\
-	netdev_printk(KERN_INFO, dev, format, ##args)
-#undef netdev_dbg
-#if defined(DEBUG)
-#define netdev_dbg(__dev, format, args...)			\
-	netdev_printk(KERN_DEBUG, __dev, format, ##args)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
-#define netdev_dbg(__dev, format, args...)			\
-do {								\
-	dynamic_dev_dbg((__dev)->dev.parent, "%s: " format,	\
-			netdev_name(__dev), ##args);		\
-} while (0)
-#else /* DEBUG */
-#define netdev_dbg(__dev, format, args...)			\
-({								\
-	if (0)							\
-		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
-	0;							\
-})
-#endif /* DEBUG */
-
-#undef netif_printk
-#define netif_printk(priv, type, level, dev, fmt, args...)	\
-do {								\
-	if (netif_msg_##type(priv))				\
-		netdev_printk(level, (dev), fmt, ##args);	\
-} while (0)
-
-#undef netif_emerg
-#define netif_emerg(priv, type, dev, fmt, args...)		\
-	netif_level(emerg, priv, type, dev, fmt, ##args)
-#undef netif_alert
-#define netif_alert(priv, type, dev, fmt, args...)		\
-	netif_level(alert, priv, type, dev, fmt, ##args)
-#undef netif_crit
-#define netif_crit(priv, type, dev, fmt, args...)		\
-	netif_level(crit, priv, type, dev, fmt, ##args)
-#undef netif_err
-#define netif_err(priv, type, dev, fmt, args...)		\
-	netif_level(err, priv, type, dev, fmt, ##args)
-#undef netif_warn
-#define netif_warn(priv, type, dev, fmt, args...)		\
-	netif_level(warn, priv, type, dev, fmt, ##args)
-#undef netif_notice
-#define netif_notice(priv, type, dev, fmt, args...)		\
-	netif_level(notice, priv, type, dev, fmt, ##args)
-#undef netif_info
-#define netif_info(priv, type, dev, fmt, args...)		\
-	netif_level(info, priv, type, dev, fmt, ##args)
-
-#ifdef SET_SYSTEM_SLEEP_PM_OPS
-#define HAVE_SYSTEM_SLEEP_PM_OPS
-#endif
-
-#ifndef for_each_set_bit
-#define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size)); \
-		(bit) < (size); \
-		(bit) = find_next_bit((addr), (size), (bit) + 1))
-#endif /* for_each_set_bit */
-
-#ifndef DEFINE_DMA_UNMAP_ADDR
-#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
-#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
-#define dma_unmap_addr pci_unmap_addr
-#define dma_unmap_addr_set pci_unmap_addr_set
-#define dma_unmap_len pci_unmap_len
-#define dma_unmap_len_set pci_unmap_len_set
-#endif /* DEFINE_DMA_UNMAP_ADDR */
-#else /* < 2.6.34 */
-#define HAVE_SYSTEM_SLEEP_PM_OPS
-#ifndef HAVE_SET_RX_MODE
-#define HAVE_SET_RX_MODE
-#endif
-
-#endif /* < 2.6.34 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-#ifndef numa_node_id
-#define numa_node_id() 0
-#endif
-#ifdef HAVE_TX_MQ
-#include <net/sch_generic.h>
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
-#define netif_set_real_num_tx_queues  _kc_netif_set_real_num_tx_queues
-#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
-#define netif_set_real_num_tx_queues(_netdev, _count) \
-	do { \
-		(_netdev)->egress_subqueue_count = _count; \
-	} while (0)
-#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
-#else
-#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
-#endif /* HAVE_TX_MQ */
-#ifndef ETH_FLAG_RXHASH
-#define ETH_FLAG_RXHASH (1<<28)
-#endif /* ETH_FLAG_RXHASH */
-#else /* < 2.6.35 */
-#define HAVE_PM_QOS_REQUEST_LIST
-#define HAVE_IRQ_AFFINITY_HINT
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
-#define ethtool_op_set_flags _kc_ethtool_op_set_flags
-extern u32 _kc_ethtool_op_get_flags(struct net_device *);
-#define ethtool_op_get_flags _kc_ethtool_op_get_flags
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#ifdef NET_IP_ALIGN
-#undef NET_IP_ALIGN
-#endif
-#define NET_IP_ALIGN 0
-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#ifdef NET_SKB_PAD
-#undef NET_SKB_PAD
-#endif
-
-#if (L1_CACHE_BYTES > 32)
-#define NET_SKB_PAD L1_CACHE_BYTES
-#else
-#define NET_SKB_PAD 32
-#endif
-
-static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
-							    unsigned int length)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
-	if (skb) {
-#if (NET_IP_ALIGN + NET_SKB_PAD)
-		skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
-#endif
-		skb->dev = dev;
-	}
-	return skb;
-}
-
-#ifdef netdev_alloc_skb_ip_align
-#undef netdev_alloc_skb_ip_align
-#endif
-#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
-
-#undef netif_level
-#define netif_level(level, priv, type, dev, fmt, args...)	\
-do {								\
-	if (netif_msg_##type(priv))				\
-		netdev_##level(dev, fmt, ##args);		\
-} while (0)
-
-#undef usleep_range
-#define usleep_range(min, max)	msleep(DIV_ROUND_UP(min, 1000))
-
-#else /* < 2.6.36 */
-#define HAVE_PM_QOS_REQUEST_ACTIVE
-#define HAVE_8021P_SUPPORT
-#define HAVE_NDO_GET_STATS64
-#endif /* < 2.6.36 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
-#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
-#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
-#endif
-#ifndef VLAN_N_VID
-#define VLAN_N_VID	VLAN_GROUP_ARRAY_LEN
-#endif /* VLAN_N_VID */
-#ifndef ETH_FLAG_TXVLAN
-#define ETH_FLAG_TXVLAN (1 << 7)
-#endif /* ETH_FLAG_TXVLAN */
-#ifndef ETH_FLAG_RXVLAN
-#define ETH_FLAG_RXVLAN (1 << 8)
-#endif /* ETH_FLAG_RXVLAN */
-
-static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
-{
-	WARN_ON(skb->ip_summed != CHECKSUM_NONE);
-}
-#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
-
-static inline void *_kc_vzalloc_node(unsigned long size, int node)
-{
-	void *addr = vmalloc_node(size, node);
-	if (addr)
-		memset(addr, 0, size);
-	return addr;
-}
-#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
-
-static inline void *_kc_vzalloc(unsigned long size)
-{
-	void *addr = vmalloc(size);
-	if (addr)
-		memset(addr, 0, size);
-	return addr;
-}
-#define vzalloc(_size) _kc_vzalloc(_size)
-
-#ifndef vlan_get_protocol
-static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
-{
-	if (vlan_tx_tag_present(skb) ||
-	    skb->protocol != cpu_to_be16(ETH_P_8021Q))
-		return skb->protocol;
-
-	if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
-		return 0;
-
-	return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
-}
-#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
-#endif
-#ifdef HAVE_HW_TIME_STAMP
-#define SKBTX_HW_TSTAMP (1 << 0)
-#define SKBTX_IN_PROGRESS (1 << 2)
-#define SKB_SHARED_TX_IS_UNION
-#endif
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
-#ifndef HAVE_VLAN_RX_REGISTER
-#define HAVE_VLAN_RX_REGISTER
-#endif
-#endif /* > 2.4.18 */
-#endif /* < 2.6.37 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
-#else /* 2.6.22 -> 2.6.37 */
-static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
-{
-        return skb->csum_start - skb_headroom(skb);
-}
-#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
-#endif /* 2.6.22 -> 2.6.37 */
-#ifdef CONFIG_DCB
-#ifndef IEEE_8021QAZ_MAX_TCS
-#define IEEE_8021QAZ_MAX_TCS 8
-#endif
-#ifndef DCB_CAP_DCBX_HOST
-#define DCB_CAP_DCBX_HOST		0x01
-#endif
-#ifndef DCB_CAP_DCBX_LLD_MANAGED
-#define DCB_CAP_DCBX_LLD_MANAGED	0x02
-#endif
-#ifndef DCB_CAP_DCBX_VER_CEE
-#define DCB_CAP_DCBX_VER_CEE		0x04
-#endif
-#ifndef DCB_CAP_DCBX_VER_IEEE
-#define DCB_CAP_DCBX_VER_IEEE		0x08
-#endif
-#ifndef DCB_CAP_DCBX_STATIC
-#define DCB_CAP_DCBX_STATIC		0x10
-#endif
-#endif /* CONFIG_DCB */
-#else /* < 2.6.38 */
-#endif /* < 2.6.38 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#ifndef skb_queue_reverse_walk_safe
-#define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
-		for (skb = (queue)->prev, tmp = skb->prev;			\
-		     skb != (struct sk_buff *)(queue);				\
-		     skb = tmp, tmp = skb->prev)
-#endif
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
-extern u8 _kc_netdev_get_num_tc(struct net_device *dev);
-#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev)
-extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up);
-#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up)
-#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0)
-#else /* RHEL6.1 or greater */
-#ifndef HAVE_MQPRIO
-#define HAVE_MQPRIO
-#endif /* HAVE_MQPRIO */
-#ifdef CONFIG_DCB
-#ifndef HAVE_DCBNL_IEEE
-#define HAVE_DCBNL_IEEE
-#ifndef IEEE_8021QAZ_TSA_STRICT
-#define IEEE_8021QAZ_TSA_STRICT		0
-#endif
-#ifndef IEEE_8021QAZ_TSA_ETS
-#define IEEE_8021QAZ_TSA_ETS		2
-#endif
-#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE
-#define IEEE_8021QAZ_APP_SEL_ETHERTYPE	1
-#endif
-#endif
-#endif /* CONFIG_DCB */
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
-#else /* < 2.6.39 */
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
-#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
-#endif
-#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
-#ifndef HAVE_MQPRIO
-#define HAVE_MQPRIO
-#endif
-#ifndef HAVE_SETUP_TC
-#define HAVE_SETUP_TC
-#endif
-#ifdef CONFIG_DCB
-#ifndef HAVE_DCBNL_IEEE
-#define HAVE_DCBNL_IEEE
-#endif
-#endif /* CONFIG_DCB */
-#ifndef HAVE_NDO_SET_FEATURES
-#define HAVE_NDO_SET_FEATURES
-#endif
-#endif /* < 2.6.39 */
-
-/*****************************************************************************/
-/* use < 2.6.40 because of a Fedora 15 kernel update where they
- * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
- * like set_phys_id for ethtool.
- */
-#undef ETHTOOL_GRXRINGS
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
-#ifdef ETHTOOL_GRXRINGS
-#ifndef FLOW_EXT
-#define	FLOW_EXT	0x80000000
-union _kc_ethtool_flow_union {
-	struct ethtool_tcpip4_spec		tcp_ip4_spec;
-	struct ethtool_usrip4_spec		usr_ip4_spec;
-	__u8					hdata[60];
-};
-struct _kc_ethtool_flow_ext {
-	__be16	vlan_etype;
-	__be16	vlan_tci;
-	__be32	data[2];
-};
-struct _kc_ethtool_rx_flow_spec {
-	__u32		flow_type;
-	union _kc_ethtool_flow_union h_u;
-	struct _kc_ethtool_flow_ext h_ext;
-	union _kc_ethtool_flow_union m_u;
-	struct _kc_ethtool_flow_ext m_ext;
-	__u64		ring_cookie;
-	__u32		location;
-};
-#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
-#endif /* FLOW_EXT */
-#endif
-
-#define pci_disable_link_state_locked pci_disable_link_state
-
-#ifndef PCI_LTR_VALUE_MASK
-#define  PCI_LTR_VALUE_MASK	0x000003ff
-#endif
-#ifndef PCI_LTR_SCALE_MASK
-#define  PCI_LTR_SCALE_MASK	0x00001c00
-#endif
-#ifndef PCI_LTR_SCALE_SHIFT
-#define  PCI_LTR_SCALE_SHIFT	10
-#endif
-
-#else /* < 2.6.40 */
-#define HAVE_ETHTOOL_SET_PHYS_ID
-#endif /* < 2.6.40 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
-#ifndef __netdev_alloc_skb_ip_align
-#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
-#endif /* __netdev_alloc_skb_ip_align */
-#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
-#define dcb_ieee_delapp(dev, app) 0
-#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
-#else /* < 3.1.0 */
-#ifndef HAVE_DCBNL_IEEE_DELAPP
-#define HAVE_DCBNL_IEEE_DELAPP
-#endif
-#endif /* < 3.1.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
-#ifdef ETHTOOL_GRXRINGS
-#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
-#endif /* ETHTOOL_GRXRINGS */
-
-#ifndef skb_frag_size
-#define skb_frag_size(frag)	_kc_skb_frag_size(frag)
-static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
-{
-	return frag->size;
-}
-#endif /* skb_frag_size */
-
-#ifndef skb_frag_size_sub
-#define skb_frag_size_sub(frag, delta)	_kc_skb_frag_size_sub(frag, delta)
-static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
-{
-	frag->size -= delta;
-}
-#endif /* skb_frag_size_sub */
-
-#ifndef skb_frag_page
-#define skb_frag_page(frag)	_kc_skb_frag_page(frag)
-static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
-{
-	return frag->page;
-}
-#endif /* skb_frag_page */
-
-#ifndef skb_frag_address
-#define skb_frag_address(frag)	_kc_skb_frag_address(frag)
-static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
-{
-	return page_address(skb_frag_page(frag)) + frag->page_offset;
-}
-#endif /* skb_frag_address */
-
-#ifndef skb_frag_dma_map
-#define skb_frag_dma_map(dev,frag,offset,size,dir) \
-		_kc_skb_frag_dma_map(dev,frag,offset,size,dir)
-static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
-					      const skb_frag_t *frag,
-					      size_t offset, size_t size,
-					      enum dma_data_direction dir)
-{
-	return dma_map_page(dev, skb_frag_page(frag),
-			    frag->page_offset + offset, size, dir);
-}
-#endif /* skb_frag_dma_map */
-
-#ifndef __skb_frag_unref
-#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
-static inline void __kc_skb_frag_unref(skb_frag_t *frag)
-{
-	put_page(skb_frag_page(frag));
-}
-#endif /* __skb_frag_unref */
-#else /* < 3.2.0 */
-#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
-#define HAVE_PCI_DEV_FLAGS_ASSIGNED
-#define HAVE_VF_SPOOFCHK_CONFIGURE
-#endif
-#endif /* < 3.2.0 */
-
-#if (RHEL_RELEASE_CODE && \
-	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \
-	(RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
-#undef ixgbe_get_netdev_tc_txq
-#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
-#endif
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
-typedef u32 kni_netdev_features_t;
-#else /* ! < 3.3.0 */
-typedef netdev_features_t kni_netdev_features_t;
-#define HAVE_INT_NDO_VLAN_RX_ADD_VID
-#ifdef ETHTOOL_SRXNTUPLE
-#undef ETHTOOL_SRXNTUPLE
-#endif
-#endif /* < 3.3.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-#ifndef NETIF_F_RXFCS
-#define NETIF_F_RXFCS	0
-#endif /* NETIF_F_RXFCS */
-#ifndef NETIF_F_RXALL
-#define NETIF_F_RXALL	0
-#endif /* NETIF_F_RXALL */
-
-#define NUMTCS_RETURNS_U8
-
-
-#endif /* < 3.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
-static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
-{
-	return !compare_ether_addr(addr1, addr2);
-}
-#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
-#else
-#define HAVE_FDB_OPS
-#endif /* < 3.5.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) )
-#define NETIF_F_HW_VLAN_TX     NETIF_F_HW_VLAN_CTAG_TX
-#define NETIF_F_HW_VLAN_RX     NETIF_F_HW_VLAN_CTAG_RX
-#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER
-#endif /* >= 3.10.0 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
-#else
-static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
-        return 0;
-}
-#endif
-#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
-
-#endif
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) )
-#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
-#endif /* >= 3.16.0 */
-
-/*
- * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
- * For older kernels backported this commit, need to use renamed functions.
- * This fix is specific to RedHat/CentOS kernels.
- */
-#if (defined(RHEL_RELEASE_CODE) && \
-	RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \
-	LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))
-#define vlan_tx_tag_get skb_vlan_tag_get
-#define vlan_tx_tag_present skb_vlan_tag_present
-#endif
-
-#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
deleted file mode 100644
index c9393d89..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_dev.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright(c) 2010-2014 Intel Corporation.
- */
-
-#ifndef _KNI_DEV_H_
-#define _KNI_DEV_H_
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include "compat.h"
-
-#include <linux/if.h>
-#include <linux/wait.h>
-#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
-#include <linux/sched/signal.h>
-#else
-#include <linux/sched.h>
-#endif
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
-#include <exec-env/rte_kni_common.h>
-#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
-
-#define MBUF_BURST_SZ 32
-
-/**
- * A structure describing the private information for a kni device.
- */
-struct kni_dev {
-	/* kni list */
-	struct list_head list;
-
-	struct net_device_stats stats;
-	int status;
-	uint16_t group_id;           /* Group ID of a group of KNI devices */
-	uint32_t core_id;            /* Core ID to bind */
-	char name[RTE_KNI_NAMESIZE]; /* Network device name */
-	struct task_struct *pthread;
-
-	/* wait queue for req/resp */
-	wait_queue_head_t wq;
-	struct mutex sync_lock;
-
-	/* PCI device id */
-	uint16_t device_id;
-
-	/* kni device */
-	struct net_device *net_dev;
-	struct net_device *lad_dev;
-	struct pci_dev *pci_dev;
-
-	/* queue for packets to be sent out */
-	void *tx_q;
-
-	/* queue for the packets received */
-	void *rx_q;
-
-	/* queue for the allocated mbufs those can be used to save sk buffs */
-	void *alloc_q;
-
-	/* free queue for the mbufs to be freed */
-	void *free_q;
-
-	/* request queue */
-	void *req_q;
-
-	/* response queue */
-	void *resp_q;
-
-	void *sync_kva;
-	void *sync_va;
-
-	void *mbuf_kva;
-	void *mbuf_va;
-
-	/* mbuf size */
-	uint32_t mbuf_size;
-
-	/* synchro for request processing */
-	unsigned long synchro;
-
-	/* buffers */
-	void *pa[MBUF_BURST_SZ];
-	void *va[MBUF_BURST_SZ];
-	void *alloc_pa[MBUF_BURST_SZ];
-	void *alloc_va[MBUF_BURST_SZ];
-};
-
-void kni_net_rx(struct kni_dev *kni);
-void kni_net_init(struct net_device *dev);
-void kni_net_config_lo_mode(char *lo_str);
-void kni_net_poll_resp(struct kni_dev *kni);
-void kni_set_ethtool_ops(struct net_device *netdev);
-
-int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-void ixgbe_kni_remove(struct pci_dev *pdev);
-int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-void igb_kni_remove(struct pci_dev *pdev);
-
-#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
deleted file mode 100644
index a44e7d94..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_ethtool.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright(c) 2010-2014 Intel Corporation.
- */
-
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include "kni_dev.h"
-
-static int
-kni_check_if_running(struct net_device *dev)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	if (priv->lad_dev)
-		return 0;
-	else
-		return -EOPNOTSUPP;
-}
-
-static void
-kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
-}
-
-static int
-kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
-}
-
-static int
-kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
-}
-
-static void
-kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
-}
-
-static int
-kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
-}
-
-static int
-kni_nway_reset(struct net_device *dev)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
-}
-
-static int
-kni_get_eeprom_len(struct net_device *dev)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
-}
-
-static int
-kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
-							u8 *bytes)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
-								bytes);
-}
-
-static int
-kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
-							u8 *bytes)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
-								bytes);
-}
-
-static void
-kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
-}
-
-static int
-kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
-}
-
-static void
-kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
-}
-
-static int
-kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
-								pause);
-}
-
-static u32
-kni_get_msglevel(struct net_device *dev)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
-}
-
-static void
-kni_set_msglevel(struct net_device *dev, u32 data)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
-}
-
-static int
-kni_get_regs_len(struct net_device *dev)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
-}
-
-static void
-kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
-}
-
-static void
-kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
-								data);
-}
-
-static int
-kni_get_sset_count(struct net_device *dev, int sset)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
-}
-
-static void
-kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
-								u64 *data)
-{
-	struct kni_dev *priv = netdev_priv(dev);
-
-	priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
-								data);
-}
-
-struct ethtool_ops kni_ethtool_ops = {
-	.begin			= kni_check_if_running,
-	.get_drvinfo		= kni_get_drvinfo,
-	.get_settings		= kni_get_settings,
-	.set_settings		= kni_set_settings,
-	.get_regs_len		= kni_get_regs_len,
-	.get_regs		= kni_get_regs,
-	.get_wol		= kni_get_wol,
-	.set_wol		= kni_set_wol,
-	.nway_reset		= kni_nway_reset,
-	.get_link		= ethtool_op_get_link,
-	.get_eeprom_len		= kni_get_eeprom_len,
-	.get_eeprom		= kni_get_eeprom,
-	.set_eeprom		= kni_set_eeprom,
-	.get_ringparam		= kni_get_ringparam,
-	.set_ringparam		= kni_set_ringparam,
-	.get_pauseparam		= kni_get_pauseparam,
-	.set_pauseparam		= kni_set_pauseparam,
-	.get_msglevel		= kni_get_msglevel,
-	.set_msglevel		= kni_set_msglevel,
-	.get_strings		= kni_get_strings,
-	.get_sset_count		= kni_get_sset_count,
-	.get_ethtool_stats	= kni_get_ethtool_stats,
-};
-
-void
-kni_set_ethtool_ops(struct net_device *netdev)
-{
-	netdev->ethtool_ops = &kni_ethtool_ops;
-}
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
deleted file mode 100644
index 9a4762de..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_fifo.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright(c) 2010-2014 Intel Corporation.
- */
-
-#ifndef _KNI_FIFO_H_
-#define _KNI_FIFO_H_
-
-#include <exec-env/rte_kni_common.h>
-
-/**
- * Adds num elements into the fifo. Return the number actually written
- */
-static inline uint32_t
-kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
-{
-	uint32_t i = 0;
-	uint32_t fifo_write = fifo->write;
-	uint32_t fifo_read = fifo->read;
-	uint32_t new_write = fifo_write;
-
-	for (i = 0; i < num; i++) {
-		new_write = (new_write + 1) & (fifo->len - 1);
-
-		if (new_write == fifo_read)
-			break;
-		fifo->buffer[fifo_write] = data[i];
-		fifo_write = new_write;
-	}
-	fifo->write = fifo_write;
-
-	return i;
-}
-
-/**
- * Get up to num elements from the fifo. Return the number actully read
- */
-static inline uint32_t
-kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
-{
-	uint32_t i = 0;
-	uint32_t new_read = fifo->read;
-	uint32_t fifo_write = fifo->write;
-
-	for (i = 0; i < num; i++) {
-		if (new_read == fifo_write)
-			break;
-
-		data[i] = fifo->buffer[new_read];
-		new_read = (new_read + 1) & (fifo->len - 1);
-	}
-	fifo->read = new_read;
-
-	return i;
-}
-
-/**
- * Get the num of elements in the fifo
- */
-static inline uint32_t
-kni_fifo_count(struct rte_kni_fifo *fifo)
-{
-	return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
-}
-
-/**
- * Get the num of available elements in the fifo
- */
-static inline uint32_t
-kni_fifo_free_count(struct rte_kni_fifo *fifo)
-{
-	return (fifo->read - fifo->write - 1) & (fifo->len - 1);
-}
-
-#endif /* _KNI_FIFO_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
deleted file mode 100644
index 01574ecf..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ /dev/null
@@ -1,663 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright(c) 2010-2014 Intel Corporation.
- */
-
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/kthread.h>
-#include <linux/rwsem.h>
-#include <linux/mutex.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-
-#include <exec-env/rte_kni_common.h>
-
-#include "compat.h"
-#include "kni_dev.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kernel Module for managing kni devices");
-
-#define KNI_RX_LOOP_NUM 1000
-
-#define KNI_MAX_DEVICES 32
-
-extern const struct pci_device_id ixgbe_pci_tbl[];
-extern const struct pci_device_id igb_pci_tbl[];
-
-/* loopback mode */
-static char *lo_mode;
-
-/* Kernel thread mode */
-static char *kthread_mode;
-static uint32_t multiple_kthread_on;
-
-#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
-
-static int kni_net_id;
-
-struct kni_net {
-	unsigned long device_in_use; /* device in use flag */
-	struct mutex kni_kthread_lock;
-	struct task_struct *kni_kthread;
-	struct rw_semaphore kni_list_lock;
-	struct list_head kni_list_head;
-};
-
-static int __net_init
-kni_init_net(struct net *net)
-{
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	struct kni_net *knet = net_generic(net, kni_net_id);
-
-	memset(knet, 0, sizeof(*knet));
-#else
-	struct kni_net *knet;
-	int ret;
-
-	knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
-	if (!knet) {
-		ret = -ENOMEM;
-		return ret;
-	}
-#endif
-
-	/* Clear the bit of device in use */
-	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
-
-	mutex_init(&knet->kni_kthread_lock);
-
-	init_rwsem(&knet->kni_list_lock);
-	INIT_LIST_HEAD(&knet->kni_list_head);
-
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	return 0;
-#else
-	ret = net_assign_generic(net, kni_net_id, knet);
-	if (ret < 0)
-		kfree(knet);
-
-	return ret;
-#endif
-}
-
-static void __net_exit
-kni_exit_net(struct net *net)
-{
-	struct kni_net *knet __maybe_unused;
-
-	knet = net_generic(net, kni_net_id);
-	mutex_destroy(&knet->kni_kthread_lock);
-
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	kfree(knet);
-#endif
-}
-
-static struct pernet_operations kni_net_ops = {
-	.init = kni_init_net,
-	.exit = kni_exit_net,
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	.id   = &kni_net_id,
-	.size = sizeof(struct kni_net),
-#endif
-};
-
-static int
-kni_thread_single(void *data)
-{
-	struct kni_net *knet = data;
-	int j;
-	struct kni_dev *dev;
-
-	while (!kthread_should_stop()) {
-		down_read(&knet->kni_list_lock);
-		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-			list_for_each_entry(dev, &knet->kni_list_head, list) {
-				kni_net_rx(dev);
-				kni_net_poll_resp(dev);
-			}
-		}
-		up_read(&knet->kni_list_lock);
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-		/* reschedule out for a while */
-		schedule_timeout_interruptible(
-			usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
-	}
-
-	return 0;
-}
-
-static int
-kni_thread_multiple(void *param)
-{
-	int j;
-	struct kni_dev *dev = param;
-
-	while (!kthread_should_stop()) {
-		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-			kni_net_rx(dev);
-			kni_net_poll_resp(dev);
-		}
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-		schedule_timeout_interruptible(
-			usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
-	}
-
-	return 0;
-}
-
-static int
-kni_open(struct inode *inode, struct file *file)
-{
-	struct net *net = current->nsproxy->net_ns;
-	struct kni_net *knet = net_generic(net, kni_net_id);
-
-	/* kni device can be opened by one user only per netns */
-	if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
-		return -EBUSY;
-
-	file->private_data = get_net(net);
-	pr_debug("/dev/kni opened\n");
-
-	return 0;
-}
-
-static int
-kni_dev_remove(struct kni_dev *dev)
-{
-	if (!dev)
-		return -ENODEV;
-
-#ifdef RTE_KNI_KMOD_ETHTOOL
-	if (dev->pci_dev) {
-		if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
-			ixgbe_kni_remove(dev->pci_dev);
-		else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
-			igb_kni_remove(dev->pci_dev);
-	}
-#endif
-
-	if (dev->net_dev) {
-		unregister_netdev(dev->net_dev);
-		free_netdev(dev->net_dev);
-	}
-
-	return 0;
-}
-
-static int
-kni_release(struct inode *inode, struct file *file)
-{
-	struct net *net = file->private_data;
-	struct kni_net *knet = net_generic(net, kni_net_id);
-	struct kni_dev *dev, *n;
-
-	/* Stop kernel thread for single mode */
-	if (multiple_kthread_on == 0) {
-		mutex_lock(&knet->kni_kthread_lock);
-		/* Stop kernel thread */
-		if (knet->kni_kthread != NULL) {
-			kthread_stop(knet->kni_kthread);
-			knet->kni_kthread = NULL;
-		}
-		mutex_unlock(&knet->kni_kthread_lock);
-	}
-
-	down_write(&knet->kni_list_lock);
-	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
-		/* Stop kernel thread for multiple mode */
-		if (multiple_kthread_on && dev->pthread != NULL) {
-			kthread_stop(dev->pthread);
-			dev->pthread = NULL;
-		}
-
-		kni_dev_remove(dev);
-		list_del(&dev->list);
-	}
-	up_write(&knet->kni_list_lock);
-
-	/* Clear the bit of device in use */
-	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
-
-	put_net(net);
-	pr_debug("/dev/kni closed\n");
-
-	return 0;
-}
-
-static int
-kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
-{
-	if (!kni || !dev)
-		return -1;
-
-	/* Check if network name has been used */
-	if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
-		pr_err("KNI name %s duplicated\n", dev->name);
-		return -1;
-	}
-
-	return 0;
-}
-
-static int
-kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
-{
-	/**
-	 * Create a new kernel thread for multiple mode, set its core affinity,
-	 * and finally wake it up.
-	 */
-	if (multiple_kthread_on) {
-		kni->pthread = kthread_create(kni_thread_multiple,
-			(void *)kni, "kni_%s", kni->name);
-		if (IS_ERR(kni->pthread)) {
-			kni_dev_remove(kni);
-			return -ECANCELED;
-		}
-
-		if (force_bind)
-			kthread_bind(kni->pthread, kni->core_id);
-		wake_up_process(kni->pthread);
-	} else {
-		mutex_lock(&knet->kni_kthread_lock);
-
-		if (knet->kni_kthread == NULL) {
-			knet->kni_kthread = kthread_create(kni_thread_single,
-				(void *)knet, "kni_single");
-			if (IS_ERR(knet->kni_kthread)) {
-				mutex_unlock(&knet->kni_kthread_lock);
-				kni_dev_remove(kni);
-				return -ECANCELED;
-			}
-
-			if (force_bind)
-				kthread_bind(knet->kni_kthread, kni->core_id);
-			wake_up_process(knet->kni_kthread);
-		}
-
-		mutex_unlock(&knet->kni_kthread_lock);
-	}
-
-	return 0;
-}
-
-static int
-kni_ioctl_create(struct net *net, uint32_t ioctl_num,
-		unsigned long ioctl_param)
-{
-	struct kni_net *knet = net_generic(net, kni_net_id);
-	int ret;
-	struct rte_kni_device_info dev_info;
-	struct net_device *net_dev = NULL;
-	struct kni_dev *kni, *dev, *n;
-#ifdef RTE_KNI_KMOD_ETHTOOL
-	struct pci_dev *found_pci = NULL;
-	struct net_device *lad_dev = NULL;
-	struct pci_dev *pci = NULL;
-#endif
-
-	pr_info("Creating kni...\n");
-	/* Check the buffer size, to avoid warning */
-	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
-		return -EINVAL;
-
-	/* Copy kni info from user space */
-	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
-	if (ret) {
-		pr_err("copy_from_user in kni_ioctl_create");
-		return -EIO;
-	}
-
-	/* Check if name is zero-ended */
-	if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
-		pr_err("kni.name not zero-terminated");
-		return -EINVAL;
-	}
-
-	/**
-	 * Check if the cpu core id is valid for binding.
-	 */
-	if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
-		pr_err("cpu %u is not online\n", dev_info.core_id);
-		return -EINVAL;
-	}
-
-	/* Check if it has been created */
-	down_read(&knet->kni_list_lock);
-	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
-		if (kni_check_param(dev, &dev_info) < 0) {
-			up_read(&knet->kni_list_lock);
-			return -EINVAL;
-		}
-	}
-	up_read(&knet->kni_list_lock);
-
-	net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
-#ifdef NET_NAME_USER
-							NET_NAME_USER,
-#endif
-							kni_net_init);
-	if (net_dev == NULL) {
-		pr_err("error allocating device \"%s\"\n", dev_info.name);
-		return -EBUSY;
-	}
-
-	dev_net_set(net_dev, net);
-
-	kni = netdev_priv(net_dev);
-
-	kni->net_dev = net_dev;
-	kni->group_id = dev_info.group_id;
-	kni->core_id = dev_info.core_id;
-	strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
-
-	/* Translate user space info into kernel space info */
-	kni->tx_q = phys_to_virt(dev_info.tx_phys);
-	kni->rx_q = phys_to_virt(dev_info.rx_phys);
-	kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
-	kni->free_q = phys_to_virt(dev_info.free_phys);
-
-	kni->req_q = phys_to_virt(dev_info.req_phys);
-	kni->resp_q = phys_to_virt(dev_info.resp_phys);
-	kni->sync_va = dev_info.sync_va;
-	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
-
-	kni->mbuf_size = dev_info.mbuf_size;
-
-	pr_debug("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
-		(unsigned long long) dev_info.tx_phys, kni->tx_q);
-	pr_debug("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
-		(unsigned long long) dev_info.rx_phys, kni->rx_q);
-	pr_debug("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
-		(unsigned long long) dev_info.alloc_phys, kni->alloc_q);
-	pr_debug("free_phys:    0x%016llx, free_q addr:    0x%p\n",
-		(unsigned long long) dev_info.free_phys, kni->free_q);
-	pr_debug("req_phys:     0x%016llx, req_q addr:     0x%p\n",
-		(unsigned long long) dev_info.req_phys, kni->req_q);
-	pr_debug("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
-		(unsigned long long) dev_info.resp_phys, kni->resp_q);
-	pr_debug("mbuf_size:    %u\n", kni->mbuf_size);
-
-	pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
-					dev_info.bus,
-					dev_info.devid,
-					dev_info.function,
-					dev_info.vendor_id,
-					dev_info.device_id);
-#ifdef RTE_KNI_KMOD_ETHTOOL
-	pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
-
-	/* Support Ethtool */
-	while (pci) {
-		pr_debug("pci_bus: %02x:%02x:%02x\n",
-					pci->bus->number,
-					PCI_SLOT(pci->devfn),
-					PCI_FUNC(pci->devfn));
-
-		if ((pci->bus->number == dev_info.bus) &&
-			(PCI_SLOT(pci->devfn) == dev_info.devid) &&
-			(PCI_FUNC(pci->devfn) == dev_info.function)) {
-			found_pci = pci;
-
-			if (pci_match_id(ixgbe_pci_tbl, found_pci))
-				ret = ixgbe_kni_probe(found_pci, &lad_dev);
-			else if (pci_match_id(igb_pci_tbl, found_pci))
-				ret = igb_kni_probe(found_pci, &lad_dev);
-			else
-				ret = -1;
-
-			pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
-							pci, lad_dev);
-			if (ret == 0) {
-				kni->lad_dev = lad_dev;
-				kni_set_ethtool_ops(kni->net_dev);
-			} else {
-				pr_err("Device not supported by ethtool");
-				kni->lad_dev = NULL;
-			}
-
-			kni->pci_dev = found_pci;
-			kni->device_id = dev_info.device_id;
-			break;
-		}
-		pci = pci_get_device(dev_info.vendor_id,
-				dev_info.device_id, pci);
-	}
-	if (pci)
-		pci_dev_put(pci);
-#endif
-
-	if (kni->lad_dev)
-		ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
-	else {
-		/* if user has provided a valid mac address */
-		if (is_valid_ether_addr((unsigned char *)(dev_info.mac_addr)))
-			memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
-		else
-			/*
-			 * Generate random mac address. eth_random_addr() is the
-			 * newer version of generating mac address in kernel.
-			 */
-			random_ether_addr(net_dev->dev_addr);
-	}
-
-	if (dev_info.mtu)
-		net_dev->mtu = dev_info.mtu;
-
-	ret = register_netdev(net_dev);
-	if (ret) {
-		pr_err("error %i registering device \"%s\"\n",
-					ret, dev_info.name);
-		kni->net_dev = NULL;
-		kni_dev_remove(kni);
-		free_netdev(net_dev);
-		return -ENODEV;
-	}
-
-	ret = kni_run_thread(knet, kni, dev_info.force_bind);
-	if (ret != 0)
-		return ret;
-
-	down_write(&knet->kni_list_lock);
-	list_add(&kni->list, &knet->kni_list_head);
-	up_write(&knet->kni_list_lock);
-
-	return 0;
-}
-
-static int
-kni_ioctl_release(struct net *net, uint32_t ioctl_num,
-		unsigned long ioctl_param)
-{
-	struct kni_net *knet = net_generic(net, kni_net_id);
-	int ret = -EINVAL;
-	struct kni_dev *dev, *n;
-	struct rte_kni_device_info dev_info;
-
-	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
-		return -EINVAL;
-
-	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
-	if (ret) {
-		pr_err("copy_from_user in kni_ioctl_release");
-		return -EIO;
-	}
-
-	/* Release the network device according to its name */
-	if (strlen(dev_info.name) == 0)
-		return ret;
-
-	down_write(&knet->kni_list_lock);
-	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
-		if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
-			continue;
-
-		if (multiple_kthread_on && dev->pthread != NULL) {
-			kthread_stop(dev->pthread);
-			dev->pthread = NULL;
-		}
-
-		kni_dev_remove(dev);
-		list_del(&dev->list);
-		ret = 0;
-		break;
-	}
-	up_write(&knet->kni_list_lock);
-	pr_info("%s release kni named %s\n",
-		(ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
-
-	return ret;
-}
-
-static int
-kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
-{
-	int ret = -EINVAL;
-	struct net *net = current->nsproxy->net_ns;
-
-	pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
-
-	/*
-	 * Switch according to the ioctl called
-	 */
-	switch (_IOC_NR(ioctl_num)) {
-	case _IOC_NR(RTE_KNI_IOCTL_TEST):
-		/* For test only, not used */
-		break;
-	case _IOC_NR(RTE_KNI_IOCTL_CREATE):
-		ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
-		break;
-	case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
-		ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
-		break;
-	default:
-		pr_debug("IOCTL default\n");
-		break;
-	}
-
-	return ret;
-}
-
-static int
-kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
-		unsigned long ioctl_param)
-{
-	/* 32 bits app on 64 bits OS to be supported later */
-	pr_debug("Not implemented.\n");
-
-	return -EINVAL;
-}
-
-static const struct file_operations kni_fops = {
-	.owner = THIS_MODULE,
-	.open = kni_open,
-	.release = kni_release,
-	.unlocked_ioctl = (void *)kni_ioctl,
-	.compat_ioctl = (void *)kni_compat_ioctl,
-};
-
-static struct miscdevice kni_misc = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = KNI_DEVICE,
-	.fops = &kni_fops,
-};
-
-static int __init
-kni_parse_kthread_mode(void)
-{
-	if (!kthread_mode)
-		return 0;
-
-	if (strcmp(kthread_mode, "single") == 0)
-		return 0;
-	else if (strcmp(kthread_mode, "multiple") == 0)
-		multiple_kthread_on = 1;
-	else
-		return -1;
-
-	return 0;
-}
-
-static int __init
-kni_init(void)
-{
-	int rc;
-
-	if (kni_parse_kthread_mode() < 0) {
-		pr_err("Invalid parameter for kthread_mode\n");
-		return -EINVAL;
-	}
-
-	if (multiple_kthread_on == 0)
-		pr_debug("Single kernel thread for all KNI devices\n");
-	else
-		pr_debug("Multiple kernel thread mode enabled\n");
-
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	rc = register_pernet_subsys(&kni_net_ops);
-#else
-	rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif
-	if (rc)
-		return -EPERM;
-
-	rc = misc_register(&kni_misc);
-	if (rc != 0) {
-		pr_err("Misc registration failed\n");
-		goto out;
-	}
-
-	/* Configure the lo mode according to the input parameter */
-	kni_net_config_lo_mode(lo_mode);
-
-	return 0;
-
-out:
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	unregister_pernet_subsys(&kni_net_ops);
-#else
-	unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
-#endif
-	return rc;
-}
-
-static void __exit
-kni_exit(void)
-{
-	misc_deregister(&kni_misc);
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	unregister_pernet_subsys(&kni_net_ops);
-#else
-	unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
-#endif
-}
-
-module_init(kni_init);
-module_exit(kni_exit);
-
-module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(lo_mode,
-"KNI loopback mode (default=lo_mode_none):\n"
-"    lo_mode_none        Kernel loopback disabled\n"
-"    lo_mode_fifo        Enable kernel loopback with fifo\n"
-"    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
-"\n"
-);
-
-module_param(kthread_mode, charp, S_IRUGO);
-MODULE_PARM_DESC(kthread_mode,
-"Kernel thread mode (default=single):\n"
-"    single    Single kernel thread mode enabled.\n"
-"    multiple  Multiple kernel thread mode enabled.\n"
-"\n"
-);
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
deleted file mode 100644
index 9f9b798c..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ /dev/null
@@ -1,757 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright(c) 2010-2014 Intel Corporation.
- */
-
-/*
- * This code is inspired from the book "Linux Device Drivers" by
- * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
- */
-
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h> /* eth_type_trans */
-#include <linux/skbuff.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-
-#include <exec-env/rte_kni_common.h>
-#include <kni_fifo.h>
-
-#include "compat.h"
-#include "kni_dev.h"
-
-#define WD_TIMEOUT 5 /*jiffies */
-
-#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
-
-/* typedef for rx function */
-typedef void (*kni_net_rx_t)(struct kni_dev *kni);
-
-static void kni_net_rx_normal(struct kni_dev *kni);
-
-/* kni rx function pointer, with default to normal rx */
-static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
-
-/* physical address to kernel virtual address */
-static void *
-pa2kva(void *pa)
-{
-	return phys_to_virt((unsigned long)pa);
-}
-
-/* physical address to virtual address */
-static void *
-pa2va(void *pa, struct rte_kni_mbuf *m)
-{
-	void *va;
-
-	va = (void *)((unsigned long)pa +
-			(unsigned long)m->buf_addr -
-			(unsigned long)m->buf_physaddr);
-	return va;
-}
-
-/* mbuf data kernel virtual address from mbuf kernel virtual address */
-static void *
-kva2data_kva(struct rte_kni_mbuf *m)
-{
-	return phys_to_virt(m->buf_physaddr + m->data_off);
-}
-
-/* virtual address to physical address */
-static void *
-va2pa(void *va, struct rte_kni_mbuf *m)
-{
-	void *pa;
-
-	pa = (void *)((unsigned long)va -
-			((unsigned long)m->buf_addr -
-			 (unsigned long)m->buf_physaddr));
-	return pa;
-}
-
-/*
- * It can be called to process the request.
- */
-static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
-{
-	int ret = -1;
-	void *resp_va;
-	uint32_t num;
-	int ret_val;
-
-	if (!kni || !req) {
-		pr_err("No kni instance or request\n");
-		return -EINVAL;
-	}
-
-	mutex_lock(&kni->sync_lock);
-
-	/* Construct data */
-	memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
-	num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
-	if (num < 1) {
-		pr_err("Cannot send to req_q\n");
-		ret = -EBUSY;
-		goto fail;
-	}
-
-	ret_val = wait_event_interruptible_timeout(kni->wq,
-			kni_fifo_count(kni->resp_q), 3 * HZ);
-	if (signal_pending(current) || ret_val <= 0) {
-		ret = -ETIME;
-		goto fail;
-	}
-	num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
-	if (num != 1 || resp_va != kni->sync_va) {
-		/* This should never happen */
-		pr_err("No data in resp_q\n");
-		ret = -ENODATA;
-		goto fail;
-	}
-
-	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
-	ret = 0;
-
-fail:
-	mutex_unlock(&kni->sync_lock);
-	return ret;
-}
-
-/*
- * Open and close
- */
-static int
-kni_net_open(struct net_device *dev)
-{
-	int ret;
-	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
-
-	netif_start_queue(dev);
-
-	memset(&req, 0, sizeof(req));
-	req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
-
-	/* Setting if_up to non-zero means up */
-	req.if_up = 1;
-	ret = kni_net_process_request(kni, &req);
-
-	return (ret == 0) ? req.result : ret;
-}
-
-static int
-kni_net_release(struct net_device *dev)
-{
-	int ret;
-	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
-
-	netif_stop_queue(dev); /* can't transmit any more */
-
-	memset(&req, 0, sizeof(req));
-	req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
-
-	/* Setting if_up to 0 means down */
-	req.if_up = 0;
-	ret = kni_net_process_request(kni, &req);
-
-	return (ret == 0) ? req.result : ret;
-}
-
-/*
- * Configuration changes (passed on by ifconfig)
- */
-static int
-kni_net_config(struct net_device *dev, struct ifmap *map)
-{
-	if (dev->flags & IFF_UP) /* can't act on a running interface */
-		return -EBUSY;
-
-	/* ignore other fields */
-	return 0;
-}
-
-/*
- * Transmit a packet (called by the kernel)
- */
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-	int len = 0;
-	uint32_t ret;
-	struct kni_dev *kni = netdev_priv(dev);
-	struct rte_kni_mbuf *pkt_kva = NULL;
-	void *pkt_pa = NULL;
-	void *pkt_va = NULL;
-
-	/* save the timestamp */
-#ifdef HAVE_TRANS_START_HELPER
-	netif_trans_update(dev);
-#else
-	dev->trans_start = jiffies;
-#endif
-
-	/* Check if the length of skb is less than mbuf size */
-	if (skb->len > kni->mbuf_size)
-		goto drop;
-
-	/**
-	 * Check if it has at least one free entry in tx_q and
-	 * one entry in alloc_q.
-	 */
-	if (kni_fifo_free_count(kni->tx_q) == 0 ||
-			kni_fifo_count(kni->alloc_q) == 0) {
-		/**
-		 * If no free entry in tx_q or no entry in alloc_q,
-		 * drops skb and goes out.
-		 */
-		goto drop;
-	}
-
-	/* dequeue a mbuf from alloc_q */
-	ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
-	if (likely(ret == 1)) {
-		void *data_kva;
-
-		pkt_kva = pa2kva(pkt_pa);
-		data_kva = kva2data_kva(pkt_kva);
-		pkt_va = pa2va(pkt_pa, pkt_kva);
-
-		len = skb->len;
-		memcpy(data_kva, skb->data, len);
-		if (unlikely(len < ETH_ZLEN)) {
-			memset(data_kva + len, 0, ETH_ZLEN - len);
-			len = ETH_ZLEN;
-		}
-		pkt_kva->pkt_len = len;
-		pkt_kva->data_len = len;
-
-		/* enqueue mbuf into tx_q */
-		ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
-		if (unlikely(ret != 1)) {
-			/* Failing should not happen */
-			pr_err("Fail to enqueue mbuf into tx_q\n");
-			goto drop;
-		}
-	} else {
-		/* Failing should not happen */
-		pr_err("Fail to dequeue mbuf from alloc_q\n");
-		goto drop;
-	}
-
-	/* Free skb and update statistics */
-	dev_kfree_skb(skb);
-	kni->stats.tx_bytes += len;
-	kni->stats.tx_packets++;
-
-	return NETDEV_TX_OK;
-
-drop:
-	/* Free skb and update statistics */
-	dev_kfree_skb(skb);
-	kni->stats.tx_dropped++;
-
-	return NETDEV_TX_OK;
-}
-
-/*
- * RX: normal working mode
- */
-static void
-kni_net_rx_normal(struct kni_dev *kni)
-{
-	uint32_t ret;
-	uint32_t len;
-	uint32_t i, num_rx, num_fq;
-	struct rte_kni_mbuf *kva;
-	void *data_kva;
-	struct sk_buff *skb;
-	struct net_device *dev = kni->net_dev;
-
-	/* Get the number of free entries in free_q */
-	num_fq = kni_fifo_free_count(kni->free_q);
-	if (num_fq == 0) {
-		/* No room on the free_q, bail out */
-		return;
-	}
-
-	/* Calculate the number of entries to dequeue from rx_q */
-	num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
-
-	/* Burst dequeue from rx_q */
-	num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
-	if (num_rx == 0)
-		return;
-
-	/* Transfer received packets to netif */
-	for (i = 0; i < num_rx; i++) {
-		kva = pa2kva(kni->pa[i]);
-		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
-		kni->va[i] = pa2va(kni->pa[i], kva);
-
-		skb = dev_alloc_skb(len + 2);
-		if (!skb) {
-			/* Update statistics */
-			kni->stats.rx_dropped++;
-			continue;
-		}
-
-		/* Align IP on 16B boundary */
-		skb_reserve(skb, 2);
-
-		if (kva->nb_segs == 1) {
-			memcpy(skb_put(skb, len), data_kva, len);
-		} else {
-			int nb_segs;
-			int kva_nb_segs = kva->nb_segs;
-
-			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
-				memcpy(skb_put(skb, kva->data_len),
-					data_kva, kva->data_len);
-
-				if (!kva->next)
-					break;
-
-				kva = pa2kva(va2pa(kva->next, kva));
-				data_kva = kva2data_kva(kva);
-			}
-		}
-
-		skb->dev = dev;
-		skb->protocol = eth_type_trans(skb, dev);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		/* Call netif interface */
-		netif_rx_ni(skb);
-
-		/* Update statistics */
-		kni->stats.rx_bytes += len;
-		kni->stats.rx_packets++;
-	}
-
-	/* Burst enqueue mbufs into free_q */
-	ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
-	if (ret != num_rx)
-		/* Failing should not happen */
-		pr_err("Fail to enqueue entries into free_q\n");
-}
-
-/*
- * RX: loopback with enqueue/dequeue fifos.
- */
-static void
-kni_net_rx_lo_fifo(struct kni_dev *kni)
-{
-	uint32_t ret;
-	uint32_t len;
-	uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
-	struct rte_kni_mbuf *kva;
-	void *data_kva;
-	struct rte_kni_mbuf *alloc_kva;
-	void *alloc_data_kva;
-
-	/* Get the number of entries in rx_q */
-	num_rq = kni_fifo_count(kni->rx_q);
-
-	/* Get the number of free entrie in tx_q */
-	num_tq = kni_fifo_free_count(kni->tx_q);
-
-	/* Get the number of entries in alloc_q */
-	num_aq = kni_fifo_count(kni->alloc_q);
-
-	/* Get the number of free entries in free_q */
-	num_fq = kni_fifo_free_count(kni->free_q);
-
-	/* Calculate the number of entries to be dequeued from rx_q */
-	num = min(num_rq, num_tq);
-	num = min(num, num_aq);
-	num = min(num, num_fq);
-	num = min_t(uint32_t, num, MBUF_BURST_SZ);
-
-	/* Return if no entry to dequeue from rx_q */
-	if (num == 0)
-		return;
-
-	/* Burst dequeue from rx_q */
-	ret = kni_fifo_get(kni->rx_q, kni->pa, num);
-	if (ret == 0)
-		return; /* Failing should not happen */
-
-	/* Dequeue entries from alloc_q */
-	ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
-	if (ret) {
-		num = ret;
-		/* Copy mbufs */
-		for (i = 0; i < num; i++) {
-			kva = pa2kva(kni->pa[i]);
-			len = kva->pkt_len;
-			data_kva = kva2data_kva(kva);
-			kni->va[i] = pa2va(kni->pa[i], kva);
-
-			alloc_kva = pa2kva(kni->alloc_pa[i]);
-			alloc_data_kva = kva2data_kva(alloc_kva);
-			kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
-
-			memcpy(alloc_data_kva, data_kva, len);
-			alloc_kva->pkt_len = len;
-			alloc_kva->data_len = len;
-
-			kni->stats.tx_bytes += len;
-			kni->stats.rx_bytes += len;
-		}
-
-		/* Burst enqueue mbufs into tx_q */
-		ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
-		if (ret != num)
-			/* Failing should not happen */
-			pr_err("Fail to enqueue mbufs into tx_q\n");
-	}
-
-	/* Burst enqueue mbufs into free_q */
-	ret = kni_fifo_put(kni->free_q, kni->va, num);
-	if (ret != num)
-		/* Failing should not happen */
-		pr_err("Fail to enqueue mbufs into free_q\n");
-
-	/**
-	 * Update statistic, and enqueue/dequeue failure is impossible,
-	 * as all queues are checked at first.
-	 */
-	kni->stats.tx_packets += num;
-	kni->stats.rx_packets += num;
-}
-
-/*
- * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
- */
-static void
-kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
-{
-	uint32_t ret;
-	uint32_t len;
-	uint32_t i, num_rq, num_fq, num;
-	struct rte_kni_mbuf *kva;
-	void *data_kva;
-	struct sk_buff *skb;
-	struct net_device *dev = kni->net_dev;
-
-	/* Get the number of entries in rx_q */
-	num_rq = kni_fifo_count(kni->rx_q);
-
-	/* Get the number of free entries in free_q */
-	num_fq = kni_fifo_free_count(kni->free_q);
-
-	/* Calculate the number of entries to dequeue from rx_q */
-	num = min(num_rq, num_fq);
-	num = min_t(uint32_t, num, MBUF_BURST_SZ);
-
-	/* Return if no entry to dequeue from rx_q */
-	if (num == 0)
-		return;
-
-	/* Burst dequeue mbufs from rx_q */
-	ret = kni_fifo_get(kni->rx_q, kni->pa, num);
-	if (ret == 0)
-		return;
-
-	/* Copy mbufs to sk buffer and then call tx interface */
-	for (i = 0; i < num; i++) {
-		kva = pa2kva(kni->pa[i]);
-		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
-		kni->va[i] = pa2va(kni->pa[i], kva);
-
-		skb = dev_alloc_skb(len + 2);
-		if (skb) {
-			/* Align IP on 16B boundary */
-			skb_reserve(skb, 2);
-			memcpy(skb_put(skb, len), data_kva, len);
-			skb->dev = dev;
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			dev_kfree_skb(skb);
-		}
-
-		/* Simulate real usage, allocate/copy skb twice */
-		skb = dev_alloc_skb(len + 2);
-		if (skb == NULL) {
-			kni->stats.rx_dropped++;
-			continue;
-		}
-
-		/* Align IP on 16B boundary */
-		skb_reserve(skb, 2);
-
-		if (kva->nb_segs == 1) {
-			memcpy(skb_put(skb, len), data_kva, len);
-		} else {
-			int nb_segs;
-			int kva_nb_segs = kva->nb_segs;
-
-			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
-				memcpy(skb_put(skb, kva->data_len),
-					data_kva, kva->data_len);
-
-				if (!kva->next)
-					break;
-
-				kva = pa2kva(va2pa(kva->next, kva));
-				data_kva = kva2data_kva(kva);
-			}
-		}
-
-		skb->dev = dev;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		kni->stats.rx_bytes += len;
-		kni->stats.rx_packets++;
-
-		/* call tx interface */
-		kni_net_tx(skb, dev);
-	}
-
-	/* enqueue all the mbufs from rx_q into free_q */
-	ret = kni_fifo_put(kni->free_q, kni->va, num);
-	if (ret != num)
-		/* Failing should not happen */
-		pr_err("Fail to enqueue mbufs into free_q\n");
-}
-
-/* rx interface */
-void
-kni_net_rx(struct kni_dev *kni)
-{
-	/**
-	 * It doesn't need to check if it is NULL pointer,
-	 * as it has a default value
-	 */
-	(*kni_net_rx_func)(kni);
-}
-
-/*
- * Deal with a transmit timeout.
- */
-static void
-kni_net_tx_timeout(struct net_device *dev)
-{
-	struct kni_dev *kni = netdev_priv(dev);
-
-	pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
-			jiffies - dev_trans_start(dev));
-
-	kni->stats.tx_errors++;
-	netif_wake_queue(dev);
-}
-
-/*
- * Ioctl commands
- */
-static int
-kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	pr_debug("kni_net_ioctl group:%d cmd:%d\n",
-		((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
-
-	return 0;
-}
-
-static void
-kni_net_set_rx_mode(struct net_device *dev)
-{
-}
-
-static int
-kni_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	int ret;
-	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(dev);
-
-	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
-
-	memset(&req, 0, sizeof(req));
-	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
-	req.new_mtu = new_mtu;
-	ret = kni_net_process_request(kni, &req);
-	if (ret == 0 && req.result == 0)
-		dev->mtu = new_mtu;
-
-	return (ret == 0) ? req.result : ret;
-}
-
-static void
-kni_net_set_promiscusity(struct net_device *netdev, int flags)
-{
-	struct rte_kni_request req;
-	struct kni_dev *kni = netdev_priv(netdev);
-
-	memset(&req, 0, sizeof(req));
-	req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
-
-	if (netdev->flags & IFF_PROMISC)
-		req.promiscusity = 1;
-	else
-		req.promiscusity = 0;
-	kni_net_process_request(kni, &req);
-}
-
-/*
- * Checks if the user space application provided the resp message
- */
-void
-kni_net_poll_resp(struct kni_dev *kni)
-{
-	if (kni_fifo_count(kni->resp_q))
-		wake_up_interruptible(&kni->wq);
-}
-
-/*
- * Return statistics to the caller
- */
-static struct net_device_stats *
-kni_net_stats(struct net_device *dev)
-{
-	struct kni_dev *kni = netdev_priv(dev);
-
-	return &kni->stats;
-}
-
-/*
- *  Fill the eth header
- */
-static int
-kni_net_header(struct sk_buff *skb, struct net_device *dev,
-		unsigned short type, const void *daddr,
-		const void *saddr, uint32_t len)
-{
-	struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-
-	memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
-	memcpy(eth->h_dest,   daddr ? daddr : dev->dev_addr, dev->addr_len);
-	eth->h_proto = htons(type);
-
-	return dev->hard_header_len;
-}
-
-/*
- * Re-fill the eth header
- */
-#ifdef HAVE_REBUILD_HEADER
-static int
-kni_net_rebuild_header(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	struct ethhdr *eth = (struct ethhdr *) skb->data;
-
-	memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
-	memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
-
-	return 0;
-}
-#endif /* < 4.1.0  */
-
-/**
- * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
- * @netdev: network interface device structure
- * @p: pointer to an address structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int
-kni_net_set_mac(struct net_device *netdev, void *p)
-{
-	int ret;
-	struct rte_kni_request req;
-	struct kni_dev *kni;
-	struct sockaddr *addr = p;
-
-	memset(&req, 0, sizeof(req));
-	req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
-
-	if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
-		return -EADDRNOTAVAIL;
-
-	memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-
-	kni = netdev_priv(netdev);
-	ret = kni_net_process_request(kni, &req);
-
-	return (ret == 0 ? req.result : ret);
-}
-
-#ifdef HAVE_CHANGE_CARRIER_CB
-static int
-kni_net_change_carrier(struct net_device *dev, bool new_carrier)
-{
-	if (new_carrier)
-		netif_carrier_on(dev);
-	else
-		netif_carrier_off(dev);
-	return 0;
-}
-#endif
-
-static const struct header_ops kni_net_header_ops = {
-	.create  = kni_net_header,
-#ifdef HAVE_REBUILD_HEADER
-	.rebuild = kni_net_rebuild_header,
-#endif /* < 4.1.0  */
-	.cache   = NULL,  /* disable caching */
-};
-
-static const struct net_device_ops kni_net_netdev_ops = {
-	.ndo_open = kni_net_open,
-	.ndo_stop = kni_net_release,
-	.ndo_set_config = kni_net_config,
-	.ndo_change_rx_flags = kni_net_set_promiscusity,
-	.ndo_start_xmit = kni_net_tx,
-	.ndo_change_mtu = kni_net_change_mtu,
-	.ndo_do_ioctl = kni_net_ioctl,
-	.ndo_set_rx_mode = kni_net_set_rx_mode,
-	.ndo_get_stats = kni_net_stats,
-	.ndo_tx_timeout = kni_net_tx_timeout,
-	.ndo_set_mac_address = kni_net_set_mac,
-#ifdef HAVE_CHANGE_CARRIER_CB
-	.ndo_change_carrier = kni_net_change_carrier,
-#endif
-};
-
-void
-kni_net_init(struct net_device *dev)
-{
-	struct kni_dev *kni = netdev_priv(dev);
-
-	init_waitqueue_head(&kni->wq);
-	mutex_init(&kni->sync_lock);
-
-	ether_setup(dev); /* assign some of the fields */
-	dev->netdev_ops      = &kni_net_netdev_ops;
-	dev->header_ops      = &kni_net_header_ops;
-	dev->watchdog_timeo = WD_TIMEOUT;
-}
-
-void
-kni_net_config_lo_mode(char *lo_str)
-{
-	if (!lo_str) {
-		pr_debug("loopback disabled");
-		return;
-	}
-
-	if (!strcmp(lo_str, "lo_mode_none"))
-		pr_debug("loopback disabled");
-	else if (!strcmp(lo_str, "lo_mode_fifo")) {
-		pr_debug("loopback mode=lo_mode_fifo enabled");
-		kni_net_rx_func = kni_net_rx_lo_fifo;
-	} else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
-		pr_debug("loopback mode=lo_mode_fifo_skb enabled");
-		kni_net_rx_func = kni_net_rx_lo_fifo_skb;
-	} else
-		pr_debug("Incognizant parameter, loopback disabled");
-}
diff --git a/lib/librte_eal/meson.build b/lib/librte_eal/meson.build
index d9ba3853..e1fde15d 100644
--- a/lib/librte_eal/meson.build
+++ b/lib/librte_eal/meson.build
@@ -12,40 +12,19 @@ subdir('common') # defines common_sources, common_objs, etc.
 if host_machine.system() == 'linux'
 	dpdk_conf.set('RTE_EXEC_ENV_LINUXAPP', 1)
 	subdir('linuxapp/eal')
-	subdir('linuxapp/igb_uio')
 
 elif host_machine.system() == 'freebsd'
 	dpdk_conf.set('RTE_EXEC_ENV_BSDAPP', 1)
 	subdir('bsdapp/eal')
-	kmods = ['contigmem', 'nic_uio']
 
-	# for building kernel modules, we use kernel build system using make, as
-	# with Linux. We have a skeleton BSDmakefile, which pulls many of its
-	# values from the environment. Each module only has a single source file
-	# right now, which allows us to simplify things. We pull in the sourcer
-	# files from the individual meson.build files, and then use a custom
-	# target to call make, passing in the values as env parameters.
-	kmod_cflags = ['-I' + meson.build_root(),
-			'-I' + join_paths(meson.source_root(), 'config'),
-			'-include rte_config.h']
-	foreach k:kmods
-		subdir(join_paths('bsdapp', k))
-		custom_target(k,
-			input: [files('bsdapp/BSDmakefile.meson'), sources],
-			output: k + '.ko',
-			command: ['make', '-f', '@INPUT0@',
-				'KMOD_SRC=@INPUT1@',
-				'KMOD=' + k,
-				'KMOD_CFLAGS=' + ' '.join(kmod_cflags)],
-			build_by_default: get_option('enable_kmods'))
-	endforeach
 else
-	error('unsupported system type @0@'.format(hostmachine.system()))
+	error('unsupported system type "@0@"'.format(host_machine.system()))
 endif
 
-version = 6  # the version of the EAL API
+version = 8  # the version of the EAL API
 allow_experimental_apis = true
 deps += 'compat'
+deps += 'kvargs'
 cflags += '-D_GNU_SOURCE'
 sources = common_sources + env_sources
 objs = common_objs + env_objs
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index d1236023..344a43d3 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -2,7 +2,6 @@ DPDK_2.0 {
 	global:
 
 	__rte_panic;
-	devargs_list;
 	eal_parse_sysfs_value;
 	eal_timer_source;
 	lcore_config;
@@ -25,7 +24,6 @@ DPDK_2.0 {
 	rte_eal_devargs_type_count;
 	rte_eal_get_configuration;
 	rte_eal_get_lcore_state;
-	rte_eal_get_physmem_layout;
 	rte_eal_get_physmem_size;
 	rte_eal_has_hugepages;
 	rte_eal_hpet_init;
@@ -163,9 +161,6 @@ DPDK_17.05 {
 	rte_log_set_global_level;
 	rte_log_set_level;
 	rte_log_set_level_regexp;
-	vfio_get_container_fd;
-	vfio_get_group_fd;
-	vfio_get_group_no;
 
 } DPDK_17.02;
 
@@ -186,7 +181,6 @@ DPDK_17.11 {
 	rte_bus_get_iommu_class;
 	rte_eal_has_pci;
 	rte_eal_iova_mode;
-	rte_eal_mbuf_default_mempool_ops;
 	rte_eal_using_phys_addrs;
 	rte_eal_vfio_intr_mode;
 	rte_lcore_has_role;
@@ -211,26 +205,15 @@ DPDK_18.02 {
 
 }  DPDK_17.11;
 
-EXPERIMENTAL {
+DPDK_18.05 {
 	global:
 
-	rte_eal_cleanup;
-	rte_eal_devargs_insert;
-	rte_eal_devargs_parse;
-	rte_eal_devargs_remove;
-	rte_eal_hotplug_add;
-	rte_eal_hotplug_remove;
-	rte_eal_mbuf_user_pool_ops;
-	rte_mp_action_register;
-	rte_mp_action_unregister;
-	rte_mp_sendmsg;
-	rte_mp_request;
-	rte_mp_reply;
+	rte_log_set_level_pattern;
 	rte_service_attr_get;
 	rte_service_attr_reset_all;
 	rte_service_component_register;
-	rte_service_component_unregister;
 	rte_service_component_runstate_set;
+	rte_service_component_unregister;
 	rte_service_dump;
 	rte_service_finalize;
 	rte_service_get_by_id;
@@ -256,3 +239,100 @@ EXPERIMENTAL {
 	rte_service_start_with_defaults;
 
 } DPDK_18.02;
+
+DPDK_18.08 {
+	global:
+
+	rte_eal_mbuf_user_pool_ops;
+	rte_uuid_compare;
+	rte_uuid_is_null;
+	rte_uuid_parse;
+	rte_uuid_unparse;
+	rte_vfio_container_create;
+	rte_vfio_container_destroy;
+	rte_vfio_container_dma_map;
+	rte_vfio_container_dma_unmap;
+	rte_vfio_container_group_bind;
+	rte_vfio_container_group_unbind;
+	rte_vfio_dma_map;
+	rte_vfio_dma_unmap;
+	rte_vfio_get_container_fd;
+	rte_vfio_get_group_fd;
+	rte_vfio_get_group_num;
+
+} DPDK_18.05;
+
+EXPERIMENTAL {
+	global:
+
+	rte_class_find;
+	rte_class_find_by_name;
+	rte_class_register;
+	rte_class_unregister;
+	rte_ctrl_thread_create;
+	rte_dev_event_callback_register;
+	rte_dev_event_callback_unregister;
+	rte_dev_event_monitor_start;
+	rte_dev_event_monitor_stop;
+	rte_dev_iterator_init;
+	rte_dev_iterator_next;
+	rte_devargs_add;
+	rte_devargs_dump;
+	rte_devargs_insert;
+	rte_devargs_next;
+	rte_devargs_parse;
+	rte_devargs_parsef;
+	rte_devargs_remove;
+	rte_devargs_type_count;
+	rte_eal_cleanup;
+	rte_eal_hotplug_add;
+	rte_eal_hotplug_remove;
+	rte_fbarray_attach;
+	rte_fbarray_destroy;
+	rte_fbarray_detach;
+	rte_fbarray_dump_metadata;
+	rte_fbarray_find_idx;
+	rte_fbarray_find_next_free;
+	rte_fbarray_find_next_used;
+	rte_fbarray_find_next_n_free;
+	rte_fbarray_find_next_n_used;
+	rte_fbarray_find_prev_free;
+	rte_fbarray_find_prev_used;
+	rte_fbarray_find_prev_n_free;
+	rte_fbarray_find_prev_n_used;
+	rte_fbarray_find_contig_free;
+	rte_fbarray_find_contig_used;
+	rte_fbarray_find_rev_contig_free;
+	rte_fbarray_find_rev_contig_used;
+	rte_fbarray_get;
+	rte_fbarray_init;
+	rte_fbarray_is_used;
+	rte_fbarray_set_free;
+	rte_fbarray_set_used;
+	rte_log_register_type_and_pick_level;
+	rte_malloc_dump_heaps;
+	rte_mem_alloc_validator_register;
+	rte_mem_alloc_validator_unregister;
+	rte_mem_event_callback_register;
+	rte_mem_event_callback_unregister;
+	rte_mem_iova2virt;
+	rte_mem_virt2memseg;
+	rte_mem_virt2memseg_list;
+	rte_memseg_contig_walk;
+	rte_memseg_contig_walk_thread_unsafe;
+	rte_memseg_list_walk;
+	rte_memseg_list_walk_thread_unsafe;
+	rte_memseg_walk;
+	rte_memseg_walk_thread_unsafe;
+	rte_mp_action_register;
+	rte_mp_action_unregister;
+	rte_mp_reply;
+	rte_mp_request_sync;
+	rte_mp_request_async;
+	rte_mp_sendmsg;
+	rte_service_lcore_attr_get;
+	rte_service_lcore_attr_reset_all;
+	rte_service_may_be_active;
+	rte_socket_count;
+	rte_socket_id_by_idx;
+};
diff --git a/lib/librte_ether/Makefile b/lib/librte_ethdev/Makefile
index 3ca5782b..0935a275 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -16,7 +16,7 @@ LDLIBS += -lrte_mbuf
 
 EXPORT_MAP := rte_ethdev_version.map
 
-LIBABIVER := 8
+LIBABIVER := 10
 
 SRCS-y += rte_ethdev.c
 SRCS-y += rte_flow.c
diff --git a/lib/librte_ether/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c
index 0d1dcda3..0d1dcda3 100644
--- a/lib/librte_ether/ethdev_profile.c
+++ b/lib/librte_ethdev/ethdev_profile.c
diff --git a/lib/librte_ether/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h
index e5ea3682..e5ea3682 100644
--- a/lib/librte_ether/ethdev_profile.h
+++ b/lib/librte_ethdev/ethdev_profile.h
diff --git a/lib/librte_ether/meson.build b/lib/librte_ethdev/meson.build
index 7fed8605..596cd0f3 100644
--- a/lib/librte_ether/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -2,7 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 name = 'ethdev'
-version = 8
+version = 10
 allow_experimental_apis = true
 sources = files('ethdev_profile.c',
 	'rte_ethdev.c',
@@ -24,4 +24,4 @@ headers = files('rte_ethdev.h',
 	'rte_tm.h',
 	'rte_tm_driver.h')
 
-deps += ['net']
+deps += ['net', 'kvargs']
diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ethdev/rte_dev_info.h
index 6b68584d..fea5da88 100644
--- a/lib/librte_ether/rte_dev_info.h
+++ b/lib/librte_ethdev/rte_dev_info.h
@@ -28,4 +28,22 @@ struct rte_dev_eeprom_info {
 	uint32_t magic; /**< Device-specific key, such as device-id */
 };
 
+/**
+ * Placeholder for accessing plugin module eeprom
+ */
+struct rte_eth_dev_module_info {
+	uint32_t type; /**< Type of plugin module eeprom */
+	uint32_t eeprom_len; /**< Length of plugin module eeprom */
+};
+
+/* EEPROM Standards for plug in modules */
+#define RTE_ETH_MODULE_SFF_8079             0x1
+#define RTE_ETH_MODULE_SFF_8079_LEN         256
+#define RTE_ETH_MODULE_SFF_8472             0x2
+#define RTE_ETH_MODULE_SFF_8472_LEN         512
+#define RTE_ETH_MODULE_SFF_8636             0x3
+#define RTE_ETH_MODULE_SFF_8636_LEN         256
+#define RTE_ETH_MODULE_SFF_8436             0x4
+#define RTE_ETH_MODULE_SFF_8436_LEN         256
+
 #endif /* _RTE_DEV_INFO_H_ */
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ethdev/rte_eth_ctrl.h
index 668f59ac..5ea8ae24 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ethdev/rte_eth_ctrl.h
@@ -54,7 +54,8 @@ extern "C" {
 #define RTE_ETH_FLOW_VXLAN              19 /**< VXLAN protocol based flow */
 #define RTE_ETH_FLOW_GENEVE             20 /**< GENEVE protocol based flow */
 #define RTE_ETH_FLOW_NVGRE              21 /**< NVGRE protocol based flow */
-#define RTE_ETH_FLOW_MAX                22
+#define RTE_ETH_FLOW_VXLAN_GPE          22 /**< VXLAN-GPE protocol based flow */
+#define RTE_ETH_FLOW_MAX                23
 
 /**
  * Feature filter types
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 0590f0c1..4c320250 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -10,6 +10,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <inttypes.h>
 #include <netinet/in.h>
@@ -34,16 +35,18 @@
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 #include <rte_string_fns.h>
-#include <rte_compat.h>
+#include <rte_kvargs.h>
 
 #include "rte_ether.h"
 #include "rte_ethdev.h"
 #include "rte_ethdev_driver.h"
 #include "ethdev_profile.h"
 
+int rte_eth_dev_logtype;
+
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
-static uint8_t eth_dev_last_created_port;
+static uint16_t eth_dev_last_created_port;
 
 /* spinlock for eth device callbacks */
 static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -123,6 +126,7 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
 	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
 	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
+	RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
@@ -222,19 +226,41 @@ rte_eth_dev_shared_data_prepare(void)
 	rte_spinlock_unlock(&rte_eth_shared_data_lock);
 }
 
-struct rte_eth_dev *
-rte_eth_dev_allocated(const char *name)
+static bool
+is_allocated(const struct rte_eth_dev *ethdev)
+{
+	return ethdev->data->name[0] != '\0';
+}
+
+static struct rte_eth_dev *
+_rte_eth_dev_allocated(const char *name)
 {
 	unsigned i;
 
 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
-		if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) &&
+		if (rte_eth_devices[i].data != NULL &&
 		    strcmp(rte_eth_devices[i].data->name, name) == 0)
 			return &rte_eth_devices[i];
 	}
 	return NULL;
 }
 
+struct rte_eth_dev *
+rte_eth_dev_allocated(const char *name)
+{
+	struct rte_eth_dev *ethdev;
+
+	rte_eth_dev_shared_data_prepare();
+
+	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
+
+	ethdev = _rte_eth_dev_allocated(name);
+
+	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
+
+	return ethdev;
+}
+
 static uint16_t
 rte_eth_dev_find_free_port(void)
 {
@@ -257,7 +283,6 @@ eth_dev_get(uint16_t port_id)
 	struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id];
 
 	eth_dev->data = &rte_eth_dev_shared_data->data[port_id];
-	eth_dev->state = RTE_ETH_DEV_ATTACHED;
 
 	eth_dev_last_created_port = port_id;
 
@@ -275,15 +300,17 @@ rte_eth_dev_allocate(const char *name)
 	/* Synchronize port creation between primary and secondary threads. */
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
-	port_id = rte_eth_dev_find_free_port();
-	if (port_id == RTE_MAX_ETHPORTS) {
-		RTE_LOG(ERR, EAL, "Reached maximum number of Ethernet ports\n");
+	if (_rte_eth_dev_allocated(name) != NULL) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethernet device with name %s already allocated\n",
+			name);
 		goto unlock;
 	}
 
-	if (rte_eth_dev_allocated(name) != NULL) {
-		RTE_LOG(ERR, EAL, "Ethernet Device with name %s already allocated!\n",
-				name);
+	port_id = rte_eth_dev_find_free_port();
+	if (port_id == RTE_MAX_ETHPORTS) {
+		RTE_ETHDEV_LOG(ERR,
+			"Reached maximum number of Ethernet ports\n");
 		goto unlock;
 	}
 
@@ -295,9 +322,6 @@ rte_eth_dev_allocate(const char *name)
 unlock:
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
 
-	if (eth_dev != NULL)
-		_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_NEW, NULL);
-
 	return eth_dev;
 }
 
@@ -322,8 +346,8 @@ rte_eth_dev_attach_secondary(const char *name)
 			break;
 	}
 	if (i == RTE_MAX_ETHPORTS) {
-		RTE_PMD_DEBUG_TRACE(
-			"device %s is not driven by the primary process\n",
+		RTE_ETHDEV_LOG(ERR,
+			"Device %s is not driven by the primary process\n",
 			name);
 	} else {
 		eth_dev = eth_dev_get(i);
@@ -342,6 +366,8 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 
 	rte_eth_dev_shared_data_prepare();
 
+	_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
 	eth_dev->state = RTE_ETH_DEV_UNUSED;
@@ -350,8 +376,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
 
-	_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
-
 	return 0;
 }
 
@@ -370,13 +394,14 @@ rte_eth_is_valid_owner_id(uint64_t owner_id)
 {
 	if (owner_id == RTE_ETH_DEV_NO_OWNER ||
 	    rte_eth_dev_shared_data->next_owner_id <= owner_id) {
-		RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016lX.\n", owner_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n",
+			owner_id);
 		return 0;
 	}
 	return 1;
 }
 
-uint64_t __rte_experimental
+uint64_t
 rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id)
 {
 	while (port_id < RTE_MAX_ETHPORTS &&
@@ -408,10 +433,15 @@ static int
 _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
 		       const struct rte_eth_dev_owner *new_owner)
 {
+	struct rte_eth_dev *ethdev = &rte_eth_devices[port_id];
 	struct rte_eth_dev_owner *port_owner;
 	int sret;
 
-	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
+		RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+			port_id);
+		return -ENODEV;
+	}
 
 	if (!rte_eth_is_valid_owner_id(new_owner->id) &&
 	    !rte_eth_is_valid_owner_id(old_owner_id))
@@ -419,22 +449,22 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
 
 	port_owner = &rte_eth_devices[port_id].data->owner;
 	if (port_owner->id != old_owner_id) {
-		RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned"
-				    " by %s_%016lX.\n", port_id,
-				    port_owner->name, port_owner->id);
+		RTE_ETHDEV_LOG(ERR,
+			"Cannot set owner to port %u already owned by %s_%016"PRIX64"\n",
+			port_id, port_owner->name, port_owner->id);
 		return -EPERM;
 	}
 
 	sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s",
 			new_owner->name);
 	if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN)
-		RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n",
-				    port_id);
+		RTE_ETHDEV_LOG(ERR, "Port %u owner name was truncated\n",
+			port_id);
 
 	port_owner->id = new_owner->id;
 
-	RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016lX.\n", port_id,
-			    new_owner->name, new_owner->id);
+	RTE_ETHDEV_LOG(DEBUG, "Port %u owner is %s_%016"PRIx64"\n",
+		port_id, new_owner->name, new_owner->id);
 
 	return 0;
 }
@@ -482,11 +512,13 @@ rte_eth_dev_owner_delete(const uint64_t owner_id)
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
 	if (rte_eth_is_valid_owner_id(owner_id)) {
-		RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, owner_id)
-			memset(&rte_eth_devices[port_id].data->owner, 0,
-			       sizeof(struct rte_eth_dev_owner));
-		RTE_PMD_DEBUG_TRACE("All port owners owned by %016X identifier"
-				    " have removed.\n", owner_id);
+		for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++)
+			if (rte_eth_devices[port_id].data->owner.id == owner_id)
+				memset(&rte_eth_devices[port_id].data->owner, 0,
+				       sizeof(struct rte_eth_dev_owner));
+		RTE_ETHDEV_LOG(ERR,
+			"All port owners owned by %016"PRIx64" identifier have removed\n",
+			owner_id);
 	}
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -496,17 +528,18 @@ int __rte_experimental
 rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner)
 {
 	int ret = 0;
+	struct rte_eth_dev *ethdev = &rte_eth_devices[port_id];
 
 	rte_eth_dev_shared_data_prepare();
 
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+	if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
+		RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+			port_id);
 		ret = -ENODEV;
 	} else {
-		rte_memcpy(owner, &rte_eth_devices[port_id].data->owner,
-			   sizeof(*owner));
+		rte_memcpy(owner, &ethdev->data->owner, sizeof(*owner));
 	}
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -521,7 +554,7 @@ rte_eth_dev_socket_id(uint16_t port_id)
 }
 
 void *
-rte_eth_dev_get_sec_ctx(uint8_t port_id)
+rte_eth_dev_get_sec_ctx(uint16_t port_id)
 {
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
 	return rte_eth_devices[port_id].security_ctx;
@@ -530,6 +563,12 @@ rte_eth_dev_get_sec_ctx(uint8_t port_id)
 uint16_t
 rte_eth_dev_count(void)
 {
+	return rte_eth_dev_count_avail();
+}
+
+uint16_t
+rte_eth_dev_count_avail(void)
+{
 	uint16_t p;
 	uint16_t count;
 
@@ -541,6 +580,18 @@ rte_eth_dev_count(void)
 	return count;
 }
 
+uint16_t __rte_experimental
+rte_eth_dev_count_total(void)
+{
+	uint16_t port, count = 0;
+
+	for (port = 0; port < RTE_MAX_ETHPORTS; port++)
+		if (rte_eth_devices[port].state != RTE_ETH_DEV_UNUSED)
+			count++;
+
+	return count;
+}
+
 int
 rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 {
@@ -549,7 +600,7 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
 	if (name == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
 		return -EINVAL;
 	}
 
@@ -566,14 +617,13 @@ rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
 	uint32_t pid;
 
 	if (name == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
 		return -EINVAL;
 	}
 
 	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
 		if (rte_eth_devices[pid].state != RTE_ETH_DEV_UNUSED &&
-		    !strncmp(name, rte_eth_dev_shared_data->data[pid].name,
-			     strlen(name))) {
+		    !strcmp(name, rte_eth_dev_shared_data->data[pid].name)) {
 			*port_id = pid;
 			return 0;
 		}
@@ -596,35 +646,37 @@ eth_err(uint16_t port_id, int ret)
 int
 rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
 {
+	int current = rte_eth_dev_count_total();
+	struct rte_devargs da;
 	int ret = -1;
-	int current = rte_eth_dev_count();
-	char *name = NULL;
-	char *args = NULL;
+
+	memset(&da, 0, sizeof(da));
 
 	if ((devargs == NULL) || (port_id == NULL)) {
 		ret = -EINVAL;
 		goto err;
 	}
 
-	/* parse devargs, then retrieve device name and args */
-	if (rte_eal_parse_devargs_str(devargs, &name, &args))
+	/* parse devargs */
+	if (rte_devargs_parse(&da, devargs))
 		goto err;
 
-	ret = rte_eal_dev_attach(name, args);
+	ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args);
 	if (ret < 0)
 		goto err;
 
 	/* no point looking at the port count if no port exists */
-	if (!rte_eth_dev_count()) {
-		RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name);
+	if (!rte_eth_dev_count_total()) {
+		RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name);
 		ret = -1;
 		goto err;
 	}
 
 	/* if nothing happened, there is a bug here, since some driver told us
 	 * it did attach a device, but did not create a port.
+	 * FIXME: race condition in case of plug-out of another device
 	 */
-	if (current == rte_eth_dev_count()) {
+	if (current == rte_eth_dev_count_total()) {
 		ret = -1;
 		goto err;
 	}
@@ -633,45 +685,42 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
 	ret = 0;
 
 err:
-	free(name);
-	free(args);
+	free(da.args);
 	return ret;
 }
 
 /* detach the device, then store the name of the device */
 int
-rte_eth_dev_detach(uint16_t port_id, char *name)
+rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
 {
+	struct rte_device *dev;
+	struct rte_bus *bus;
 	uint32_t dev_flags;
 	int ret = -1;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
-	if (name == NULL) {
-		ret = -EINVAL;
-		goto err;
-	}
-
 	dev_flags = rte_eth_devices[port_id].data->dev_flags;
 	if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
-		RTE_LOG(ERR, EAL, "Port %" PRIu16 " is bonded, cannot detach\n",
-			port_id);
-		ret = -ENOTSUP;
-		goto err;
+		RTE_ETHDEV_LOG(ERR,
+			"Port %"PRIu16" is bonded, cannot detach\n", port_id);
+		return -ENOTSUP;
 	}
 
-	snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
-		 "%s", rte_eth_devices[port_id].data->name);
+	dev = rte_eth_devices[port_id].device;
+	if (dev == NULL)
+		return -EINVAL;
+
+	bus = rte_bus_find_by_device(dev);
+	if (bus == NULL)
+		return -ENOENT;
 
-	ret = rte_eal_dev_detach(rte_eth_devices[port_id].device);
+	ret = rte_eal_hotplug_remove(bus->name, dev->name);
 	if (ret < 0)
-		goto err;
+		return ret;
 
 	rte_eth_dev_release_port(&rte_eth_devices[port_id]);
 	return 0;
-
-err:
-	return ret;
 }
 
 static int
@@ -732,16 +781,23 @@ rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
 	dev = &rte_eth_devices[port_id];
+	if (!dev->data->dev_started) {
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be started before start any queue\n",
+			port_id);
+		return -EINVAL;
+	}
+
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
 
 	if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
 			rx_queue_id, port_id);
 		return 0;
 	}
@@ -760,15 +816,15 @@ rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP);
 
 	if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
 			rx_queue_id, port_id);
 		return 0;
 	}
@@ -785,23 +841,28 @@ rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
 	dev = &rte_eth_devices[port_id];
+	if (!dev->data->dev_started) {
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be started before start any queue\n",
+			port_id);
+		return -EINVAL;
+	}
+
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP);
 
 	if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
 			tx_queue_id, port_id);
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->tx_queue_start(dev,
-							     tx_queue_id));
-
+	return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id));
 }
 
 int
@@ -813,15 +874,15 @@ rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP);
 
 	if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
 			tx_queue_id, port_id);
 		return 0;
 	}
@@ -913,95 +974,6 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
 	}
 }
 
-/**
- * A conversion function from rxmode bitfield API.
- */
-static void
-rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode,
-				    uint64_t *rx_offloads)
-{
-	uint64_t offloads = 0;
-
-	if (rxmode->header_split == 1)
-		offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT;
-	if (rxmode->hw_ip_checksum == 1)
-		offloads |= DEV_RX_OFFLOAD_CHECKSUM;
-	if (rxmode->hw_vlan_filter == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
-	if (rxmode->hw_vlan_strip == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
-	if (rxmode->hw_vlan_extend == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
-	if (rxmode->jumbo_frame == 1)
-		offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
-	if (rxmode->hw_strip_crc == 1)
-		offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-	if (rxmode->enable_scatter == 1)
-		offloads |= DEV_RX_OFFLOAD_SCATTER;
-	if (rxmode->enable_lro == 1)
-		offloads |= DEV_RX_OFFLOAD_TCP_LRO;
-	if (rxmode->hw_timestamp == 1)
-		offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
-	if (rxmode->security == 1)
-		offloads |= DEV_RX_OFFLOAD_SECURITY;
-
-	*rx_offloads = offloads;
-}
-
-/**
- * A conversion function from rxmode offloads API.
- */
-static void
-rte_eth_convert_rx_offloads(const uint64_t rx_offloads,
-			    struct rte_eth_rxmode *rxmode)
-{
-
-	if (rx_offloads & DEV_RX_OFFLOAD_HEADER_SPLIT)
-		rxmode->header_split = 1;
-	else
-		rxmode->header_split = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_CHECKSUM)
-		rxmode->hw_ip_checksum = 1;
-	else
-		rxmode->hw_ip_checksum = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
-		rxmode->hw_vlan_filter = 1;
-	else
-		rxmode->hw_vlan_filter = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
-		rxmode->hw_vlan_strip = 1;
-	else
-		rxmode->hw_vlan_strip = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
-		rxmode->hw_vlan_extend = 1;
-	else
-		rxmode->hw_vlan_extend = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
-		rxmode->jumbo_frame = 1;
-	else
-		rxmode->jumbo_frame = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
-		rxmode->hw_strip_crc = 1;
-	else
-		rxmode->hw_strip_crc = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-		rxmode->enable_scatter = 1;
-	else
-		rxmode->enable_scatter = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
-		rxmode->enable_lro = 1;
-	else
-		rxmode->enable_lro = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-		rxmode->hw_timestamp = 1;
-	else
-		rxmode->hw_timestamp = 0;
-	if (rx_offloads & DEV_RX_OFFLOAD_SECURITY)
-		rxmode->security = 1;
-	else
-		rxmode->security = 0;
-}
-
 const char * __rte_experimental
 rte_eth_dev_rx_offload_name(uint64_t offload)
 {
@@ -1045,43 +1017,49 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	/* If number of queues specified by application for both Rx and Tx is
+	 * zero, use driver preferred values. This cannot be done individually
+	 * as it is valid for either Tx or Rx (but not both) to be zero.
+	 * If driver does not provide any preferred valued, fall back on
+	 * EAL defaults.
+	 */
+	if (nb_rx_q == 0 && nb_tx_q == 0) {
+		nb_rx_q = dev_info.default_rxportconf.nb_queues;
+		if (nb_rx_q == 0)
+			nb_rx_q = RTE_ETH_DEV_FALLBACK_RX_NBQUEUES;
+		nb_tx_q = dev_info.default_txportconf.nb_queues;
+		if (nb_tx_q == 0)
+			nb_tx_q = RTE_ETH_DEV_FALLBACK_TX_NBQUEUES;
+	}
+
 	if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
-		RTE_PMD_DEBUG_TRACE(
+		RTE_ETHDEV_LOG(ERR,
 			"Number of RX queues requested (%u) is greater than max supported(%d)\n",
 			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
 		return -EINVAL;
 	}
 
 	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
-		RTE_PMD_DEBUG_TRACE(
+		RTE_ETHDEV_LOG(ERR,
 			"Number of TX queues requested (%u) is greater than max supported(%d)\n",
 			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
 		return -EINVAL;
 	}
 
-	dev = &rte_eth_devices[port_id];
-
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
-
 	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be stopped to allow configuration\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be stopped to allow configuration\n",
+			port_id);
 		return -EBUSY;
 	}
 
-	/*
-	 * Convert between the offloads API to enable PMDs to support
-	 * only one of them.
-	 */
-	if (dev_conf->rxmode.ignore_offload_bitfield == 0) {
-		rte_eth_convert_rx_offload_bitfield(
-				&dev_conf->rxmode, &local_conf.rxmode.offloads);
-	} else {
-		rte_eth_convert_rx_offloads(dev_conf->rxmode.offloads,
-					    &local_conf.rxmode);
-	}
-
 	/* Copy the dev_conf parameter into the dev structure */
 	memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf));
 
@@ -1090,36 +1068,29 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	 * than the maximum number of RX and TX queues supported by the
 	 * configured device.
 	 */
-	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);
-
-	if (nb_rx_q == 0 && nb_tx_q == 0) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0\n", port_id);
-		return -EINVAL;
-	}
-
 	if (nb_rx_q > dev_info.max_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n",
-				port_id, nb_rx_q, dev_info.max_rx_queues);
+		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n",
+			port_id, nb_rx_q, dev_info.max_rx_queues);
 		return -EINVAL;
 	}
 
 	if (nb_tx_q > dev_info.max_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n",
-				port_id, nb_tx_q, dev_info.max_tx_queues);
+		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n",
+			port_id, nb_tx_q, dev_info.max_tx_queues);
 		return -EINVAL;
 	}
 
 	/* Check that the device supports requested interrupts */
 	if ((dev_conf->intr_conf.lsc == 1) &&
-		(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
-			RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
-					dev->device->driver->name);
-			return -EINVAL;
+			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
+		RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n",
+			dev->device->driver->name);
+		return -EINVAL;
 	}
 	if ((dev_conf->intr_conf.rmv == 1) &&
-	    (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
-		RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n",
-				    dev->device->driver->name);
+			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
+		RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n",
+			dev->device->driver->name);
 		return -EINVAL;
 	}
 
@@ -1128,19 +1099,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	 * length is supported by the configured device.
 	 */
 	if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
-		if (dev_conf->rxmode.max_rx_pkt_len >
-		    dev_info.max_rx_pktlen) {
-			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
-				" > max valid value %u\n",
-				port_id,
-				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
-				(unsigned)dev_info.max_rx_pktlen);
+		if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
+			RTE_ETHDEV_LOG(ERR,
+				"Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
+				port_id, dev_conf->rxmode.max_rx_pkt_len,
+				dev_info.max_rx_pktlen);
 			return -EINVAL;
 		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
-			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
-				" < min valid value %u\n",
-				port_id,
-				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
+			RTE_ETHDEV_LOG(ERR,
+				"Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
+				port_id, dev_conf->rxmode.max_rx_pkt_len,
 				(unsigned)ETHER_MIN_LEN);
 			return -EINVAL;
 		}
@@ -1152,28 +1120,71 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 							ETHER_MAX_LEN;
 	}
 
+	/* Any requested offloading must be within its device capabilities */
+	if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) !=
+	     local_conf.rxmode.offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads "
+			"capabilities 0x%"PRIx64" in %s()\n",
+			port_id, local_conf.rxmode.offloads,
+			dev_info.rx_offload_capa,
+			__func__);
+		return -EINVAL;
+	}
+	if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) !=
+	     local_conf.txmode.offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads "
+			"capabilities 0x%"PRIx64" in %s()\n",
+			port_id, local_conf.txmode.offloads,
+			dev_info.tx_offload_capa,
+			__func__);
+		return -EINVAL;
+	}
+
+	if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) &&
+			(local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
+		RTE_ETHDEV_LOG(ERR,
+			"Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	/* Check that device supports requested rss hash functions. */
+	if ((dev_info.flow_type_rss_offloads |
+	     dev_conf->rx_adv_conf.rss_conf.rss_hf) !=
+	    dev_info.flow_type_rss_offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+			port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf,
+			dev_info.flow_type_rss_offloads);
+		return -EINVAL;
+	}
+
 	/*
 	 * Setup new number of RX/TX queues and reconfigure device.
 	 */
 	diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR,
+			"Port%u rte_eth_dev_rx_queue_config = %d\n",
+			port_id, diag);
 		return diag;
 	}
 
 	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR,
+			"Port%u rte_eth_dev_tx_queue_config = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		return diag;
 	}
 
 	diag = (*dev->dev_ops->dev_configure)(dev);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR, "Port%u dev_configure = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
 		return eth_err(port_id, diag);
@@ -1182,8 +1193,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	/* Initialize Rx profiling if enabled at compilation time. */
 	diag = __rte_eth_profile_rx_init(port_id, dev);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
 		return eth_err(port_id, diag);
@@ -1196,8 +1207,7 @@ void
 _rte_eth_dev_reset(struct rte_eth_dev *dev)
 {
 	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-			"port %d must be stopped to allow reset\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u must be stopped to allow reset\n",
 			dev->data->port_id);
 		return;
 	}
@@ -1276,8 +1286,8 @@ rte_eth_dev_start(uint16_t port_id)
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
 
 	if (dev->data->dev_started != 0) {
-		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Device with port_id=%"PRIu16" already started\n",
 			port_id);
 		return 0;
 	}
@@ -1308,8 +1318,8 @@ rte_eth_dev_stop(uint16_t port_id)
 	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
 
 	if (dev->data->dev_started == 0) {
-		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Device with port_id=%"PRIu16" already stopped\n",
 			port_id);
 		return;
 	}
@@ -1421,16 +1431,10 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
-	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be stopped to allow configuration\n", port_id);
-		return -EBUSY;
-	}
-
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);
 
@@ -1441,39 +1445,53 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	 */
 	rte_eth_dev_info_get(port_id, &dev_info);
 	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
-		RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n",
-				mp->name, (int) mp->private_data_size,
-				(int) sizeof(struct rte_pktmbuf_pool_private));
+		RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n",
+			mp->name, (int)mp->private_data_size,
+			(int)sizeof(struct rte_pktmbuf_pool_private));
 		return -ENOSPC;
 	}
 	mbp_buf_size = rte_pktmbuf_data_room_size(mp);
 
 	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
-		RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
-				"(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
-				"=%d)\n",
-				mp->name,
-				(int)mbp_buf_size,
-				(int)(RTE_PKTMBUF_HEADROOM +
-				      dev_info.min_rx_bufsize),
-				(int)RTE_PKTMBUF_HEADROOM,
-				(int)dev_info.min_rx_bufsize);
+		RTE_ETHDEV_LOG(ERR,
+			"%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n",
+			mp->name, (int)mbp_buf_size,
+			(int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize),
+			(int)RTE_PKTMBUF_HEADROOM,
+			(int)dev_info.min_rx_bufsize);
 		return -EINVAL;
 	}
 
+	/* Use default specified by driver, if nb_rx_desc is zero */
+	if (nb_rx_desc == 0) {
+		nb_rx_desc = dev_info.default_rxportconf.ring_size;
+		/* If driver default is also zero, fall back on EAL default */
+		if (nb_rx_desc == 0)
+			nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE;
+	}
+
 	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
 			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
 			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
 
-		RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
-			"should be: <= %hu, = %hu, and a product of %hu\n",
-			nb_rx_desc,
-			dev_info.rx_desc_lim.nb_max,
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+			nb_rx_desc, dev_info.rx_desc_lim.nb_max,
 			dev_info.rx_desc_lim.nb_min,
 			dev_info.rx_desc_lim.nb_align);
 		return -EINVAL;
 	}
 
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+			RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
+		return -EBUSY;
+
+	if (dev->data->dev_started &&
+		(dev->data->rx_queue_state[rx_queue_id] !=
+			RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+
 	rxq = dev->data->rx_queues;
 	if (rxq[rx_queue_id]) {
 		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
@@ -1486,13 +1504,34 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		rx_conf = &dev_info.default_rxconf;
 
 	local_conf = *rx_conf;
-	if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) {
-		/**
-		 * Reflect port offloads to queue offloads in order for
-		 * offloads to not be discarded.
-		 */
-		rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode,
-						    &local_conf.offloads);
+
+	/*
+	 * If an offloading has already been enabled in
+	 * rte_eth_dev_configure(), it has been enabled on all queues,
+	 * so there is no need to enable it in this queue again.
+	 * The local_conf.offloads input to underlying PMD only carries
+	 * those offloadings which are only enabled on this queue and
+	 * not enabled on all queues.
+	 */
+	local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads;
+
+	/*
+	 * New added offloadings for this queue are those not enabled in
+	 * rte_eth_dev_configure() and they must be per-queue type.
+	 * A pure per-port offloading can't be enabled on a queue while
+	 * disabled on another queue. A pure per-port offloading can't
+	 * be enabled for any queue as new added one if it hasn't been
+	 * enabled in rte_eth_dev_configure().
+	 */
+	if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
+	     local_conf.offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			port_id, rx_queue_id, local_conf.offloads,
+			dev_info.rx_queue_offload_capa,
+			__func__);
+		return -EINVAL;
 	}
 
 	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
@@ -1506,55 +1545,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	return eth_err(port_id, ret);
 }
 
-/**
- * A conversion function from txq_flags API.
- */
-static void
-rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads)
-{
-	uint64_t offloads = 0;
-
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS))
-		offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL))
-		offloads |= DEV_TX_OFFLOAD_VLAN_INSERT;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP))
-		offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP))
-		offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP))
-		offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
-	if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) &&
-	    (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP))
-		offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
-
-	*tx_offloads = offloads;
-}
-
-/**
- * A conversion function from offloads API.
- */
-static void
-rte_eth_convert_txq_offloads(const uint64_t tx_offloads, uint32_t *txq_flags)
-{
-	uint32_t flags = 0;
-
-	if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS))
-		flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT))
-		flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
-	if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
-		flags |= (ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP);
-
-	*txq_flags = flags;
-}
-
 int
 rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		       uint16_t nb_tx_desc, unsigned int socket_id,
@@ -1569,33 +1559,43 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
-	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be stopped to allow configuration\n", port_id);
-		return -EBUSY;
-	}
-
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);
 
 	rte_eth_dev_info_get(port_id, &dev_info);
 
+	/* Use default specified by driver, if nb_tx_desc is zero */
+	if (nb_tx_desc == 0) {
+		nb_tx_desc = dev_info.default_txportconf.ring_size;
+		/* If driver default is zero, fall back on EAL default */
+		if (nb_tx_desc == 0)
+			nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE;
+	}
 	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
 	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
 	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
-		RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
-				"should be: <= %hu, = %hu, and a product of %hu\n",
-				nb_tx_desc,
-				dev_info.tx_desc_lim.nb_max,
-				dev_info.tx_desc_lim.nb_min,
-				dev_info.tx_desc_lim.nb_align);
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+			nb_tx_desc, dev_info.tx_desc_lim.nb_max,
+			dev_info.tx_desc_lim.nb_min,
+			dev_info.tx_desc_lim.nb_align);
 		return -EINVAL;
 	}
 
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+			RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
+		return -EBUSY;
+
+	if (dev->data->dev_started &&
+		(dev->data->tx_queue_state[tx_queue_id] !=
+			RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+
 	txq = dev->data->tx_queues;
 	if (txq[tx_queue_id]) {
 		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
@@ -1607,19 +1607,35 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	if (tx_conf == NULL)
 		tx_conf = &dev_info.default_txconf;
 
+	local_conf = *tx_conf;
+
 	/*
-	 * Convert between the offloads API to enable PMDs to support
-	 * only one of them.
+	 * If an offloading has already been enabled in
+	 * rte_eth_dev_configure(), it has been enabled on all queues,
+	 * so there is no need to enable it in this queue again.
+	 * The local_conf.offloads input to underlying PMD only carries
+	 * those offloadings which are only enabled on this queue and
+	 * not enabled on all queues.
 	 */
-	local_conf = *tx_conf;
-	if (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) {
-		rte_eth_convert_txq_offloads(tx_conf->offloads,
-					     &local_conf.txq_flags);
-		/* Keep the ignore flag. */
-		local_conf.txq_flags |= ETH_TXQ_FLAGS_IGNORE;
-	} else {
-		rte_eth_convert_txq_flags(tx_conf->txq_flags,
-					  &local_conf.offloads);
+	local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads;
+
+	/*
+	 * New added offloadings for this queue are those not enabled in
+	 * rte_eth_dev_configure() and they must be per-queue type.
+	 * A pure per-port offloading can't be enabled on a queue while
+	 * disabled on another queue. A pure per-port offloading can't
+	 * be enabled for any queue as new added one if it hasn't been
+	 * enabled in rte_eth_dev_configure().
+	 */
+	if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
+	     local_conf.offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			port_id, tx_queue_id, local_conf.offloads,
+			dev_info.tx_queue_offload_capa,
+			__func__);
+		return -EINVAL;
 	}
 
 	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
@@ -1765,20 +1781,6 @@ rte_eth_allmulticast_get(uint16_t port_id)
 	return dev->data->all_multicast;
 }
 
-static inline int
-rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev,
-				struct rte_eth_link *link)
-{
-	struct rte_eth_link *dst = link;
-	struct rte_eth_link *src = &(dev->data->dev_link);
-
-	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-					*(uint64_t *)src) == 0)
-		return -1;
-
-	return 0;
-}
-
 void
 rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link)
 {
@@ -1787,8 +1789,9 @@ rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link)
 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
 	dev = &rte_eth_devices[port_id];
 
-	if (dev->data->dev_conf.intr_conf.lsc != 0)
-		rte_eth_dev_atomic_read_link_status(dev, eth_link);
+	if (dev->data->dev_conf.intr_conf.lsc &&
+	    dev->data->dev_started)
+		rte_eth_linkstatus_get(dev, eth_link);
 	else {
 		RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update);
 		(*dev->dev_ops->link_update)(dev, 1);
@@ -1804,8 +1807,9 @@ rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link)
 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
 	dev = &rte_eth_devices[port_id];
 
-	if (dev->data->dev_conf.intr_conf.lsc != 0)
-		rte_eth_dev_atomic_read_link_status(dev, eth_link);
+	if (dev->data->dev_conf.intr_conf.lsc &&
+	    dev->data->dev_started)
+		rte_eth_linkstatus_get(dev, eth_link);
 	else {
 		RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update);
 		(*dev->dev_ops->link_update)(dev, 0);
@@ -1895,19 +1899,19 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	if (!id) {
-		RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n");
+		RTE_ETHDEV_LOG(ERR, "Id pointer is NULL\n");
 		return -ENOMEM;
 	}
 
 	if (!xstat_name) {
-		RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n");
+		RTE_ETHDEV_LOG(ERR, "xstat_name pointer is NULL\n");
 		return -ENOMEM;
 	}
 
 	/* Get count */
 	cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL);
 	if (cnt_xstats  < 0) {
-		RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n");
+		RTE_ETHDEV_LOG(ERR, "Cannot get count of xstats\n");
 		return -ENODEV;
 	}
 
@@ -1916,7 +1920,7 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
 
 	if (cnt_xstats != rte_eth_xstats_get_names_by_id(
 			port_id, xstats_names, cnt_xstats, NULL)) {
-		RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n");
+		RTE_ETHDEV_LOG(ERR, "Cannot get xstats lookup\n");
 		return -1;
 	}
 
@@ -2039,7 +2043,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
 		sizeof(struct rte_eth_xstat_name));
 
 	if (!xstats_names_copy) {
-		RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory");
+		RTE_ETHDEV_LOG(ERR, "Can't allocate memory\n");
 		return -ENOMEM;
 	}
 
@@ -2067,7 +2071,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
 	/* Filter stats */
 	for (i = 0; i < size; i++) {
 		if (ids[i] >= expected_entries) {
-			RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+			RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
 			free(xstats_names_copy);
 			return -1;
 		}
@@ -2252,7 +2256,7 @@ rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
 	/* Filter stats */
 	for (i = 0; i < size; i++) {
 		if (ids[i] >= expected_entries) {
-			RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+			RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
 			return -1;
 		}
 		values[i] = xstats[ids[i]].value;
@@ -2342,6 +2346,16 @@ set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx,
 	dev = &rte_eth_devices[port_id];
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP);
+
+	if (is_rx && (queue_id >= dev->data->nb_rx_queues))
+		return -EINVAL;
+
+	if (!is_rx && (queue_id >= dev->data->nb_tx_queues))
+		return -EINVAL;
+
+	if (stat_idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
+		return -EINVAL;
+
 	return (*dev->dev_ops->queue_stats_mapping_set)
 			(dev, queue_id, stat_idx, is_rx);
 }
@@ -2393,12 +2407,15 @@ rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
 	memset(dev_info, 0, sizeof(struct rte_eth_dev_info));
 	dev_info->rx_desc_lim = lim;
 	dev_info->tx_desc_lim = lim;
+	dev_info->device = dev->device;
 
 	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
 	dev_info->driver_name = dev->device->driver->name;
 	dev_info->nb_rx_queues = dev->data->nb_rx_queues;
 	dev_info->nb_tx_queues = dev->data->nb_tx_queues;
+
+	dev_info->dev_flags = &dev->data->dev_flags;
 }
 
 int
@@ -2477,13 +2494,14 @@ rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on)
 	dev = &rte_eth_devices[port_id];
 	if (!(dev->data->dev_conf.rxmode.offloads &
 	      DEV_RX_OFFLOAD_VLAN_FILTER)) {
-		RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Port %u: vlan-filtering disabled\n",
+			port_id);
 		return -ENOSYS;
 	}
 
 	if (vlan_id > 4095) {
-		RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n",
-				port_id, (unsigned) vlan_id);
+		RTE_ETHDEV_LOG(ERR, "Port_id=%u invalid vlan_id=%u > 4095\n",
+			port_id, vlan_id);
 		return -EINVAL;
 	}
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
@@ -2516,7 +2534,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid rx_queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
@@ -2601,19 +2619,10 @@ rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask)
 		return ret;
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP);
-
-	/*
-	 * Convert to the offload bitfield API just in case the underlying PMD
-	 * still supporting it.
-	 */
-	rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads,
-				    &dev->data->dev_conf.rxmode);
 	ret = (*dev->dev_ops->vlan_offload_set)(dev, mask);
 	if (ret) {
 		/* hit an error restore  original values */
 		dev->data->dev_conf.rxmode.offloads = orig_offloads;
-		rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads,
-					    &dev->data->dev_conf.rxmode);
 	}
 
 	return eth_err(port_id, ret);
@@ -2674,7 +2683,7 @@ rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) {
-		RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid send_xon, only 0/1 allowed\n");
 		return -EINVAL;
 	}
 
@@ -2691,7 +2700,7 @@ rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) {
-		RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid priority, only 0-7 allowed\n");
 		return -EINVAL;
 	}
 
@@ -2732,7 +2741,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
 		return -EINVAL;
 
 	if (max_rxq == 0) {
-		RTE_PMD_DEBUG_TRACE("No receive queue is available\n");
+		RTE_ETHDEV_LOG(ERR, "No receive queue is available\n");
 		return -EINVAL;
 	}
 
@@ -2741,8 +2750,9 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
 		shift = i % RTE_RETA_GROUP_SIZE;
 		if ((reta_conf[idx].mask & (1ULL << shift)) &&
 			(reta_conf[idx].reta[shift] >= max_rxq)) {
-			RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds "
-				"the maximum rxq index: %u\n", idx, shift,
+			RTE_ETHDEV_LOG(ERR,
+				"reta_conf[%u]->reta[%u]: %u exceeds the maximum rxq index: %u\n",
+				idx, shift,
 				reta_conf[idx].reta[shift], max_rxq);
 			return -EINVAL;
 		}
@@ -2804,9 +2814,19 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
 			    struct rte_eth_rss_conf *rss_conf)
 {
 	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info = { .flow_type_rss_offloads = 0, };
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+	if ((dev_info.flow_type_rss_offloads | rss_conf->rss_hf) !=
+	    dev_info.flow_type_rss_offloads) {
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+			port_id, rss_conf->rss_hf,
+			dev_info.flow_type_rss_offloads);
+		return -EINVAL;
+	}
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP);
 	return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
 								 rss_conf));
@@ -2833,12 +2853,12 @@ rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (udp_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -2858,12 +2878,12 @@ rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id,
 	dev = &rte_eth_devices[port_id];
 
 	if (udp_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -2931,12 +2951,12 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP);
 
 	if (is_zero_ether_addr(addr)) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
 			port_id);
 		return -EINVAL;
 	}
 	if (pool >= ETH_64_POOLS) {
-		RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1);
+		RTE_ETHDEV_LOG(ERR, "Pool id must be 0-%d\n", ETH_64_POOLS - 1);
 		return -EINVAL;
 	}
 
@@ -2944,7 +2964,7 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
 	if (index < 0) {
 		index = get_mac_addr_index(port_id, &null_mac_addr);
 		if (index < 0) {
-			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
+			RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
 				port_id);
 			return -ENOSPC;
 		}
@@ -2982,7 +3002,9 @@ rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr)
 
 	index = get_mac_addr_index(port_id, addr);
 	if (index == 0) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u: Cannot remove default MAC address\n",
+			port_id);
 		return -EADDRINUSE;
 	} else if (index < 0)
 		return 0;  /* Do nothing if address wasn't found */
@@ -3003,6 +3025,7 @@ int
 rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
@@ -3012,11 +3035,13 @@ rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr)
 	dev = &rte_eth_devices[port_id];
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP);
 
+	ret = (*dev->dev_ops->mac_addr_set)(dev, addr);
+	if (ret < 0)
+		return ret;
+
 	/* Update default address in NIC data structure */
 	ether_addr_copy(addr, &dev->data->mac_addrs[0]);
 
-	(*dev->dev_ops->mac_addr_set)(dev, addr);
-
 	return 0;
 }
 
@@ -3056,7 +3081,7 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
 
 	dev = &rte_eth_devices[port_id];
 	if (is_zero_ether_addr(addr)) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
 			port_id);
 		return -EINVAL;
 	}
@@ -3068,15 +3093,16 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
 
 	if (index < 0) {
 		if (!on) {
-			RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not "
-				"set in UTA\n", port_id);
+			RTE_ETHDEV_LOG(ERR,
+				"Port %u: the MAC address was not set in UTA\n",
+				port_id);
 			return -EINVAL;
 		}
 
 		index = get_hash_mac_addr_index(port_id, &null_mac_addr);
 		if (index < 0) {
-			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
-					port_id);
+			RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
+				port_id);
 			return -ENOSPC;
 		}
 	}
@@ -3124,14 +3150,15 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	link = dev->data->dev_link;
 
 	if (queue_idx > dev_info.max_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: "
-				"invalid queue id=%d\n", port_id, queue_idx);
+		RTE_ETHDEV_LOG(ERR,
+			"Set queue rate limit:port %u: invalid queue id=%u\n",
+			port_id, queue_idx);
 		return -EINVAL;
 	}
 
 	if (tx_rate > link.link_speed) {
-		RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, "
-				"bigger than link speed= %d\n",
+		RTE_ETHDEV_LOG(ERR,
+			"Set queue rate limit:invalid tx_rate=%u, bigger than link speed= %d\n",
 			tx_rate, link.link_speed);
 		return -EINVAL;
 	}
@@ -3150,26 +3177,28 @@ rte_eth_mirror_rule_set(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (mirror_conf->rule_type == 0) {
-		RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR, "Mirror rule type can not be 0\n");
 		return -EINVAL;
 	}
 
 	if (mirror_conf->dst_pool >= ETH_64_POOLS) {
-		RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n",
-				ETH_64_POOLS - 1);
+		RTE_ETHDEV_LOG(ERR, "Invalid dst pool, pool id must be 0-%d\n",
+			ETH_64_POOLS - 1);
 		return -EINVAL;
 	}
 
 	if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP |
 	     ETH_MIRROR_VIRTUAL_POOL_DOWN)) &&
 	    (mirror_conf->pool_mask == 0)) {
-		RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid mirror pool, pool mask can not be 0\n");
 		return -EINVAL;
 	}
 
 	if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) &&
 	    mirror_conf->vlan.vlan_mask == 0) {
-		RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid vlan mask, vlan mask can not be 0\n");
 		return -EINVAL;
 	}
 
@@ -3216,7 +3245,7 @@ rte_eth_dev_callback_register(uint16_t port_id,
 		return -EINVAL;
 
 	if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
-		RTE_LOG(ERR, EAL, "Invalid port_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
 		return -EINVAL;
 	}
 
@@ -3279,7 +3308,7 @@ rte_eth_dev_callback_unregister(uint16_t port_id,
 		return -EINVAL;
 
 	if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
-		RTE_LOG(ERR, EAL, "Invalid port_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
 		return -EINVAL;
 	}
 
@@ -3348,6 +3377,17 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 	return rc;
 }
 
+void
+rte_eth_dev_probing_finish(struct rte_eth_dev *dev)
+{
+	if (dev == NULL)
+		return;
+
+	_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_NEW, NULL);
+
+	dev->state = RTE_ETH_DEV_ATTACHED;
+}
+
 int
 rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 {
@@ -3362,13 +3402,13 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 	dev = &rte_eth_devices[port_id];
 
 	if (!dev->intr_handle) {
-		RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
 		return -ENOTSUP;
 	}
 
 	intr_handle = dev->intr_handle;
 	if (!intr_handle->intr_vec) {
-		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
 		return -EPERM;
 	}
 
@@ -3376,9 +3416,9 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 		vec = intr_handle->intr_vec[qid];
 		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
 		if (rc && rc != -EEXIST) {
-			RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
-					" op %d epfd %d vec %u\n",
-					port_id, qid, op, epfd, vec);
+			RTE_ETHDEV_LOG(ERR,
+				"p %u q %u rx ctl error op %d epfd %d vec %u\n",
+				port_id, qid, op, epfd, vec);
 		}
 	}
 
@@ -3401,7 +3441,103 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
 	if (mz)
 		return mz;
 
-	return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align);
+	return rte_memzone_reserve_aligned(z_name, size, socket_id,
+			RTE_MEMZONE_IOVA_CONTIG, align);
+}
+
+int __rte_experimental
+rte_eth_dev_create(struct rte_device *device, const char *name,
+	size_t priv_data_size,
+	ethdev_bus_specific_init ethdev_bus_specific_init,
+	void *bus_init_params,
+	ethdev_init_t ethdev_init, void *init_params)
+{
+	struct rte_eth_dev *ethdev;
+	int retval;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*ethdev_init, -EINVAL);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ethdev = rte_eth_dev_allocate(name);
+		if (!ethdev) {
+			retval = -ENODEV;
+			goto probe_failed;
+		}
+
+		if (priv_data_size) {
+			ethdev->data->dev_private = rte_zmalloc_socket(
+				name, priv_data_size, RTE_CACHE_LINE_SIZE,
+				device->numa_node);
+
+			if (!ethdev->data->dev_private) {
+				RTE_LOG(ERR, EAL, "failed to allocate private data");
+				retval = -ENOMEM;
+				goto probe_failed;
+			}
+		}
+	} else {
+		ethdev = rte_eth_dev_attach_secondary(name);
+		if (!ethdev) {
+			RTE_LOG(ERR, EAL, "secondary process attach failed, "
+				"ethdev doesn't exist");
+			retval = -ENODEV;
+			goto probe_failed;
+		}
+	}
+
+	ethdev->device = device;
+
+	if (ethdev_bus_specific_init) {
+		retval = ethdev_bus_specific_init(ethdev, bus_init_params);
+		if (retval) {
+			RTE_LOG(ERR, EAL,
+				"ethdev bus specific initialisation failed");
+			goto probe_failed;
+		}
+	}
+
+	retval = ethdev_init(ethdev, init_params);
+	if (retval) {
+		RTE_LOG(ERR, EAL, "ethdev initialisation failed");
+		goto probe_failed;
+	}
+
+	rte_eth_dev_probing_finish(ethdev);
+
+	return retval;
+probe_failed:
+	/* free ports private data if primary process */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(ethdev->data->dev_private);
+
+	rte_eth_dev_release_port(ethdev);
+
+	return retval;
+}
+
+int  __rte_experimental
+rte_eth_dev_destroy(struct rte_eth_dev *ethdev,
+	ethdev_uninit_t ethdev_uninit)
+{
+	int ret;
+
+	ethdev = rte_eth_dev_allocated(ethdev->data->name);
+	if (!ethdev)
+		return -ENODEV;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*ethdev_uninit, -EINVAL);
+	if (ethdev_uninit) {
+		ret = ethdev_uninit(ethdev);
+		if (ret)
+			return ret;
+	}
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(ethdev->data->dev_private);
+
+	ethdev->data->dev_private = NULL;
+
+	return rte_eth_dev_release_port(ethdev);
 }
 
 int
@@ -3417,27 +3553,27 @@ rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
 	if (!dev->intr_handle) {
-		RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
 		return -ENOTSUP;
 	}
 
 	intr_handle = dev->intr_handle;
 	if (!intr_handle->intr_vec) {
-		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
 		return -EPERM;
 	}
 
 	vec = intr_handle->intr_vec[queue_id];
 	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
 	if (rc && rc != -EEXIST) {
-		RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
-				" op %d epfd %d vec %u\n",
-				port_id, queue_id, op, epfd, vec);
+		RTE_ETHDEV_LOG(ERR,
+			"p %u q %u rx ctl error op %d epfd %d vec %u\n",
+			port_id, queue_id, op, epfd, vec);
 		return rc;
 	}
 
@@ -3490,153 +3626,8 @@ rte_eth_dev_filter_supported(uint16_t port_id,
 }
 
 int
-rte_eth_dev_filter_ctrl_v22(uint16_t port_id,
-			    enum rte_filter_type filter_type,
-			    enum rte_filter_op filter_op, void *arg);
-
-int
-rte_eth_dev_filter_ctrl_v22(uint16_t port_id,
-			    enum rte_filter_type filter_type,
-			    enum rte_filter_op filter_op, void *arg)
-{
-	struct rte_eth_fdir_info_v22 {
-		enum rte_fdir_mode mode;
-		struct rte_eth_fdir_masks mask;
-		struct rte_eth_fdir_flex_conf flex_conf;
-		uint32_t guarant_spc;
-		uint32_t best_spc;
-		uint32_t flow_types_mask[1];
-		uint32_t max_flexpayload;
-		uint32_t flex_payload_unit;
-		uint32_t max_flex_payload_segment_num;
-		uint16_t flex_payload_limit;
-		uint32_t flex_bitmask_unit;
-		uint32_t max_flex_bitmask_num;
-	};
-
-	struct rte_eth_hash_global_conf_v22 {
-		enum rte_eth_hash_function hash_func;
-		uint32_t sym_hash_enable_mask[1];
-		uint32_t valid_bit_mask[1];
-	};
-
-	struct rte_eth_hash_filter_info_v22 {
-		enum rte_eth_hash_filter_info_type info_type;
-		union {
-			uint8_t enable;
-			struct rte_eth_hash_global_conf_v22 global_conf;
-			struct rte_eth_input_set_conf input_set_conf;
-		} info;
-	};
-
-	struct rte_eth_dev *dev;
-
-	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-	dev = &rte_eth_devices[port_id];
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
-	if (filter_op == RTE_ETH_FILTER_INFO) {
-		int retval;
-		struct rte_eth_fdir_info_v22 *fdir_info_v22;
-		struct rte_eth_fdir_info fdir_info;
-
-		fdir_info_v22 = (struct rte_eth_fdir_info_v22 *)arg;
-
-		retval = (*dev->dev_ops->filter_ctrl)(dev, filter_type,
-			  filter_op, (void *)&fdir_info);
-		fdir_info_v22->mode = fdir_info.mode;
-		fdir_info_v22->mask = fdir_info.mask;
-		fdir_info_v22->flex_conf = fdir_info.flex_conf;
-		fdir_info_v22->guarant_spc = fdir_info.guarant_spc;
-		fdir_info_v22->best_spc = fdir_info.best_spc;
-		fdir_info_v22->flow_types_mask[0] =
-			(uint32_t)fdir_info.flow_types_mask[0];
-		fdir_info_v22->max_flexpayload = fdir_info.max_flexpayload;
-		fdir_info_v22->flex_payload_unit = fdir_info.flex_payload_unit;
-		fdir_info_v22->max_flex_payload_segment_num =
-			fdir_info.max_flex_payload_segment_num;
-		fdir_info_v22->flex_payload_limit =
-			fdir_info.flex_payload_limit;
-		fdir_info_v22->flex_bitmask_unit = fdir_info.flex_bitmask_unit;
-		fdir_info_v22->max_flex_bitmask_num =
-			fdir_info.max_flex_bitmask_num;
-		return retval;
-	} else if (filter_op == RTE_ETH_FILTER_GET) {
-		int retval;
-		struct rte_eth_hash_filter_info f_info;
-		struct rte_eth_hash_filter_info_v22 *f_info_v22 =
-			(struct rte_eth_hash_filter_info_v22 *)arg;
-
-		f_info.info_type = f_info_v22->info_type;
-		retval = (*dev->dev_ops->filter_ctrl)(dev, filter_type,
-			  filter_op, (void *)&f_info);
-
-		switch (f_info_v22->info_type) {
-		case RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT:
-			f_info_v22->info.enable = f_info.info.enable;
-			break;
-		case RTE_ETH_HASH_FILTER_GLOBAL_CONFIG:
-			f_info_v22->info.global_conf.hash_func =
-				f_info.info.global_conf.hash_func;
-			f_info_v22->info.global_conf.sym_hash_enable_mask[0] =
-				(uint32_t)
-				f_info.info.global_conf.sym_hash_enable_mask[0];
-			f_info_v22->info.global_conf.valid_bit_mask[0] =
-				(uint32_t)
-				f_info.info.global_conf.valid_bit_mask[0];
-			break;
-		case RTE_ETH_HASH_FILTER_INPUT_SET_SELECT:
-			f_info_v22->info.input_set_conf =
-				f_info.info.input_set_conf;
-			break;
-		default:
-			break;
-		}
-		return retval;
-	} else if (filter_op == RTE_ETH_FILTER_SET) {
-		struct rte_eth_hash_filter_info f_info;
-		struct rte_eth_hash_filter_info_v22 *f_v22 =
-			(struct rte_eth_hash_filter_info_v22 *)arg;
-
-		f_info.info_type = f_v22->info_type;
-		switch (f_v22->info_type) {
-		case RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT:
-			f_info.info.enable = f_v22->info.enable;
-			break;
-		case RTE_ETH_HASH_FILTER_GLOBAL_CONFIG:
-			f_info.info.global_conf.hash_func =
-				f_v22->info.global_conf.hash_func;
-			f_info.info.global_conf.sym_hash_enable_mask[0] =
-				(uint32_t)
-				f_v22->info.global_conf.sym_hash_enable_mask[0];
-			f_info.info.global_conf.valid_bit_mask[0] =
-				(uint32_t)
-				f_v22->info.global_conf.valid_bit_mask[0];
-			break;
-		case RTE_ETH_HASH_FILTER_INPUT_SET_SELECT:
-			f_info.info.input_set_conf =
-				f_v22->info.input_set_conf;
-			break;
-		default:
-			break;
-		}
-		return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op,
-						    (void *)&f_info);
-	} else
-		return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op,
-						    arg);
-}
-VERSION_SYMBOL(rte_eth_dev_filter_ctrl, _v22, 2.2);
-
-int
-rte_eth_dev_filter_ctrl_v1802(uint16_t port_id,
-			      enum rte_filter_type filter_type,
-			      enum rte_filter_op filter_op, void *arg);
-
-int
-rte_eth_dev_filter_ctrl_v1802(uint16_t port_id,
-			      enum rte_filter_type filter_type,
-			      enum rte_filter_op filter_op, void *arg)
+rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
+			enum rte_filter_op filter_op, void *arg)
 {
 	struct rte_eth_dev *dev;
 
@@ -3647,13 +3638,8 @@ rte_eth_dev_filter_ctrl_v1802(uint16_t port_id,
 	return eth_err(port_id, (*dev->dev_ops->filter_ctrl)(dev, filter_type,
 							     filter_op, arg));
 }
-BIND_DEFAULT_SYMBOL(rte_eth_dev_filter_ctrl, _v1802, 18.02);
-MAP_STATIC_SYMBOL(int rte_eth_dev_filter_ctrl(uint16_t port_id,
-		  enum rte_filter_type filter_type,
-		  enum rte_filter_op filter_op, void *arg),
-		  rte_eth_dev_filter_ctrl_v1802);
 
-void *
+const struct rte_eth_rxtx_callback *
 rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_rx_callback_fn fn, void *user_param)
 {
@@ -3695,7 +3681,7 @@ rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
 	return cb;
 }
 
-void *
+const struct rte_eth_rxtx_callback *
 rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_rx_callback_fn fn, void *user_param)
 {
@@ -3730,7 +3716,7 @@ rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
 	return cb;
 }
 
-void *
+const struct rte_eth_rxtx_callback *
 rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_tx_callback_fn fn, void *user_param)
 {
@@ -3775,7 +3761,7 @@ rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
 
 int
 rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
-		struct rte_eth_rxtx_callback *user_cb)
+		const struct rte_eth_rxtx_callback *user_cb)
 {
 #ifndef RTE_ETHDEV_RXTX_CALLBACKS
 	return -ENOTSUP;
@@ -3809,7 +3795,7 @@ rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
 
 int
 rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
-		struct rte_eth_rxtx_callback *user_cb)
+		const struct rte_eth_rxtx_callback *user_cb)
 {
 #ifndef RTE_ETHDEV_RXTX_CALLBACKS
 	return -ENOTSUP;
@@ -3854,7 +3840,7 @@ rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
@@ -3878,7 +3864,7 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
@@ -3886,6 +3872,7 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	memset(qinfo, 0, sizeof(*qinfo));
 	dev->dev_ops->txq_info_get(dev, queue_id, qinfo);
+
 	return 0;
 }
 
@@ -4043,6 +4030,32 @@ rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info)
 	return eth_err(port_id, (*dev->dev_ops->set_eeprom)(dev, info));
 }
 
+int __rte_experimental
+rte_eth_dev_get_module_info(uint16_t port_id,
+			    struct rte_eth_dev_module_info *modinfo)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_info, -ENOTSUP);
+	return (*dev->dev_ops->get_module_info)(dev, modinfo);
+}
+
+int __rte_experimental
+rte_eth_dev_get_module_eeprom(uint16_t port_id,
+			      struct rte_dev_eeprom_info *info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_eeprom, -ENOTSUP);
+	return (*dev->dev_ops->get_module_eeprom)(dev, info);
+}
+
 int
 rte_eth_dev_get_dcb_info(uint16_t port_id,
 			     struct rte_eth_dcb_info *dcb_info)
@@ -4066,12 +4079,12 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (l2_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -4093,17 +4106,17 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	if (l2_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
 	if (mask == 0) {
-		RTE_PMD_DEBUG_TRACE("Mask should have a value.\n");
+		RTE_ETHDEV_LOG(ERR, "Mask should have a value\n");
 		return -EINVAL;
 	}
 
@@ -4168,3 +4181,245 @@ rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
 
 	return (*dev->dev_ops->pool_ops_supported)(dev, pool);
 }
+
+/**
+ * A set of values to describe the possible states of a switch domain.
+ */
+enum rte_eth_switch_domain_state {
+	RTE_ETH_SWITCH_DOMAIN_UNUSED = 0,
+	RTE_ETH_SWITCH_DOMAIN_ALLOCATED
+};
+
+/**
+ * Array of switch domains available for allocation. Array is sized to
+ * RTE_MAX_ETHPORTS elements as there cannot be more active switch domains than
+ * ethdev ports in a single process.
+ */
+struct rte_eth_dev_switch {
+	enum rte_eth_switch_domain_state state;
+} rte_eth_switch_domains[RTE_MAX_ETHPORTS];
+
+int __rte_experimental
+rte_eth_switch_domain_alloc(uint16_t *domain_id)
+{
+	unsigned int i;
+
+	*domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+
+	for (i = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID + 1;
+		i < RTE_MAX_ETHPORTS; i++) {
+		if (rte_eth_switch_domains[i].state ==
+			RTE_ETH_SWITCH_DOMAIN_UNUSED) {
+			rte_eth_switch_domains[i].state =
+				RTE_ETH_SWITCH_DOMAIN_ALLOCATED;
+			*domain_id = i;
+			return 0;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+int __rte_experimental
+rte_eth_switch_domain_free(uint16_t domain_id)
+{
+	if (domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID ||
+		domain_id >= RTE_MAX_ETHPORTS)
+		return -EINVAL;
+
+	if (rte_eth_switch_domains[domain_id].state !=
+		RTE_ETH_SWITCH_DOMAIN_ALLOCATED)
+		return -EINVAL;
+
+	rte_eth_switch_domains[domain_id].state = RTE_ETH_SWITCH_DOMAIN_UNUSED;
+
+	return 0;
+}
+
+typedef int (*rte_eth_devargs_callback_t)(char *str, void *data);
+
+static int
+rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in)
+{
+	int state;
+	struct rte_kvargs_pair *pair;
+	char *letter;
+
+	arglist->str = strdup(str_in);
+	if (arglist->str == NULL)
+		return -ENOMEM;
+
+	letter = arglist->str;
+	state = 0;
+	arglist->count = 0;
+	pair = &arglist->pairs[0];
+	while (1) {
+		switch (state) {
+		case 0: /* Initial */
+			if (*letter == '=')
+				return -EINVAL;
+			else if (*letter == '\0')
+				return 0;
+
+			state = 1;
+			pair->key = letter;
+			/* fall-thru */
+
+		case 1: /* Parsing key */
+			if (*letter == '=') {
+				*letter = '\0';
+				pair->value = letter + 1;
+				state = 2;
+			} else if (*letter == ',' || *letter == '\0')
+				return -EINVAL;
+			break;
+
+
+		case 2: /* Parsing value */
+			if (*letter == '[')
+				state = 3;
+			else if (*letter == ',') {
+				*letter = '\0';
+				arglist->count++;
+				pair = &arglist->pairs[arglist->count];
+				state = 0;
+			} else if (*letter == '\0') {
+				letter--;
+				arglist->count++;
+				pair = &arglist->pairs[arglist->count];
+				state = 0;
+			}
+			break;
+
+		case 3: /* Parsing list */
+			if (*letter == ']')
+				state = 2;
+			else if (*letter == '\0')
+				return -EINVAL;
+			break;
+		}
+		letter++;
+	}
+}
+
+static int
+rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
+	void *data)
+{
+	char *str_start;
+	int state;
+	int result;
+
+	if (*str != '[')
+		/* Single element, not a list */
+		return callback(str, data);
+
+	/* Sanity check, then strip the brackets */
+	str_start = &str[strlen(str) - 1];
+	if (*str_start != ']') {
+		RTE_LOG(ERR, EAL, "(%s): List does not end with ']'", str);
+		return -EINVAL;
+	}
+	str++;
+	*str_start = '\0';
+
+	/* Process list elements */
+	state = 0;
+	while (1) {
+		if (state == 0) {
+			if (*str == '\0')
+				break;
+			if (*str != ',') {
+				str_start = str;
+				state = 1;
+			}
+		} else if (state == 1) {
+			if (*str == ',' || *str == '\0') {
+				if (str > str_start) {
+					/* Non-empty string fragment */
+					*str = '\0';
+					result = callback(str_start, data);
+					if (result < 0)
+						return result;
+				}
+				state = 0;
+			}
+		}
+		str++;
+	}
+	return 0;
+}
+
+static int
+rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list,
+	const uint16_t max_list)
+{
+	uint16_t lo, hi, val;
+	int result;
+
+	result = sscanf(str, "%hu-%hu", &lo, &hi);
+	if (result == 1) {
+		if (*len_list >= max_list)
+			return -ENOMEM;
+		list[(*len_list)++] = lo;
+	} else if (result == 2) {
+		if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS)
+			return -EINVAL;
+		for (val = lo; val <= hi; val++) {
+			if (*len_list >= max_list)
+				return -ENOMEM;
+			list[(*len_list)++] = val;
+		}
+	} else
+		return -EINVAL;
+	return 0;
+}
+
+
+static int
+rte_eth_devargs_parse_representor_ports(char *str, void *data)
+{
+	struct rte_eth_devargs *eth_da = data;
+
+	return rte_eth_devargs_process_range(str, eth_da->representor_ports,
+		&eth_da->nb_representor_ports, RTE_MAX_ETHPORTS);
+}
+
+int __rte_experimental
+rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
+{
+	struct rte_kvargs args;
+	struct rte_kvargs_pair *pair;
+	unsigned int i;
+	int result = 0;
+
+	memset(eth_da, 0, sizeof(*eth_da));
+
+	result = rte_eth_devargs_tokenise(&args, dargs);
+	if (result < 0)
+		goto parse_cleanup;
+
+	for (i = 0; i < args.count; i++) {
+		pair = &args.pairs[i];
+		if (strcmp("representor", pair->key) == 0) {
+			result = rte_eth_devargs_parse_list(pair->value,
+				rte_eth_devargs_parse_representor_ports,
+				eth_da);
+			if (result < 0)
+				goto parse_cleanup;
+		}
+	}
+
+parse_cleanup:
+	if (args.str)
+		free(args.str);
+
+	return result;
+}
+
+RTE_INIT(ethdev_init_log)
+{
+	rte_eth_dev_logtype = rte_log_register("lib.ethdev");
+	if (rte_eth_dev_logtype >= 0)
+		rte_log_set_level(rte_eth_dev_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 03615330..7070e9ab 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -159,6 +159,11 @@ extern "C" {
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
 
+extern int rte_eth_dev_logtype;
+
+#define RTE_ETHDEV_LOG(level, ...) \
+	rte_log(RTE_LOG_ ## level, rte_eth_dev_logtype, "" __VA_ARGS__)
+
 struct rte_mbuf;
 
 /**
@@ -321,7 +326,7 @@ enum rte_eth_tx_mq_mode {
 struct rte_eth_rxmode {
 	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
 	enum rte_eth_rx_mq_mode mq_mode;
-	uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
+	uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
 	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
 	/**
 	 * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags.
@@ -329,33 +334,6 @@ struct rte_eth_rxmode {
 	 * structure are allowed to be set.
 	 */
 	uint64_t offloads;
-	__extension__
-	/**
-	 * Below bitfield API is obsolete. Application should
-	 * enable per-port offloads using the offload field
-	 * above.
-	 */
-	uint16_t header_split : 1, /**< Header Split enable. */
-		hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
-		hw_vlan_filter   : 1, /**< VLAN filter enable. */
-		hw_vlan_strip    : 1, /**< VLAN strip enable. */
-		hw_vlan_extend   : 1, /**< Extended VLAN enable. */
-		jumbo_frame      : 1, /**< Jumbo Frame Receipt enable. */
-		hw_strip_crc     : 1, /**< Enable CRC stripping by hardware. */
-		enable_scatter   : 1, /**< Enable scatter packets rx handler */
-		enable_lro       : 1, /**< Enable LRO */
-		hw_timestamp     : 1, /**< Enable HW timestamp */
-		security	 : 1, /**< Enable rte_security offloads */
-		/**
-		 * When set the offload bitfield should be ignored.
-		 * Instead per-port Rx offloads should be set on offloads
-		 * field above.
-		 * Per-queue offloads shuold be set on rte_eth_rxq_conf
-		 * structure.
-		 * This bit is temporary till rxmode bitfield offloads API will
-		 * be deprecated.
-		 */
-		ignore_offload_bitfield : 1;
 };
 
 /**
@@ -702,28 +680,6 @@ struct rte_eth_rxconf {
 	uint64_t offloads;
 };
 
-#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
-#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */
-#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */
-#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */
-#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
-#define ETH_TXQ_FLAGS_NOOFFLOADS \
-		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
-		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
-#define ETH_TXQ_FLAGS_NOXSUMS \
-		(ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
-		 ETH_TXQ_FLAGS_NOXSUMTCP)
-/**
- * When set the txq_flags should be ignored,
- * instead per-queue Tx offloads will be set on offloads field
- * located on rte_eth_txq_conf struct.
- * This flag is temporary till the rte_eth_txq_conf.txq_flags
- * API will be deprecated.
- */
-#define ETH_TXQ_FLAGS_IGNORE	0x8000
-
 /**
  * A structure used to configure a TX ring of an Ethernet port.
  */
@@ -733,7 +689,6 @@ struct rte_eth_txconf {
 	uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are
 				      less free descriptors than this value. */
 
-	uint32_t txq_flags; /**< Set flags for the Tx queue */
 	uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
 	/**
 	 * Per-queue Tx offloads to be set  using DEV_TX_OFFLOAD_* flags.
@@ -939,6 +894,12 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_SCATTER		0x00002000
 #define DEV_RX_OFFLOAD_TIMESTAMP	0x00004000
 #define DEV_RX_OFFLOAD_SECURITY         0x00008000
+
+/**
+ * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC
+ * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC
+ */
+#define DEV_RX_OFFLOAD_KEEP_CRC		0x00010000
 #define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
 				 DEV_RX_OFFLOAD_UDP_CKSUM | \
 				 DEV_RX_OFFLOAD_TCP_CKSUM)
@@ -980,22 +941,81 @@ struct rte_eth_conf {
  *   the same mempool and has refcnt = 1.
  */
 #define DEV_TX_OFFLOAD_SECURITY         0x00020000
+/**
+ * Device supports generic UDP tunneled packet TSO.
+ * Application must set PKT_TX_TUNNEL_UDP and other mbuf fields required
+ * for tunnel TSO.
+ */
+#define DEV_TX_OFFLOAD_UDP_TNL_TSO      0x00040000
+/**
+ * Device supports generic IP tunneled packet TSO.
+ * Application must set PKT_TX_TUNNEL_IP and other mbuf fields required
+ * for tunnel TSO.
+ */
+#define DEV_TX_OFFLOAD_IP_TNL_TSO       0x00080000
+
+#define RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP 0x00000001
+/**< Device supports Rx queue setup after device started*/
+#define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
+/**< Device supports Tx queue setup after device started*/
 
 /*
  * If new Tx offload capabilities are defined, they also must be
  * mentioned in rte_tx_offload_names in rte_ethdev.c file.
  */
 
-struct rte_pci_device;
+/*
+ * Fallback default preferred Rx/Tx port parameters.
+ * These are used if an application requests default parameters
+ * but the PMD does not provide preferred values.
+ */
+#define RTE_ETH_DEV_FALLBACK_RX_RINGSIZE 512
+#define RTE_ETH_DEV_FALLBACK_TX_RINGSIZE 512
+#define RTE_ETH_DEV_FALLBACK_RX_NBQUEUES 1
+#define RTE_ETH_DEV_FALLBACK_TX_NBQUEUES 1
+
+/**
+ * Preferred Rx/Tx port parameters.
+ * There are separate instances of this structure for transmission
+ * and reception respectively.
+ */
+struct rte_eth_dev_portconf {
+	uint16_t burst_size; /**< Device-preferred burst size */
+	uint16_t ring_size; /**< Device-preferred size of queue rings */
+	uint16_t nb_queues; /**< Device-preferred number of queues */
+};
+
+/**
+ * Default values for switch domain id when ethdev does not support switch
+ * domain definitions.
+ */
+#define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID	(0)
+
+/**
+ * Ethernet device associated switch information
+ */
+struct rte_eth_switch_info {
+	const char *name;	/**< switch name */
+	uint16_t domain_id;	/**< switch domain id */
+	uint16_t port_id;
+	/**<
+	 * mapping to the devices physical switch port as enumerated from the
+	 * perspective of the embedded interconnect/switch. For SR-IOV enabled
+	 * device this may correspond to the VF_ID of each virtual function,
+	 * but each driver should explicitly define the mapping of switch
+	 * port identifier to that physical interconnect/switch
+	 */
+};
 
 /**
  * Ethernet device information
  */
 struct rte_eth_dev_info {
-	struct rte_pci_device *pci_dev; /**< Device PCI information. */
+	struct rte_device *device; /** Generic device information */
 	const char *driver_name; /**< Device Driver name. */
 	unsigned int if_index; /**< Index to bound host interface, or 0 if none.
 		Use if_indextoname() to translate into an interface name. */
+	const uint32_t *dev_flags; /**< Device flags */
 	uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */
 	uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */
 	uint16_t max_rx_queues; /**< Maximum number of RX queues. */
@@ -1006,13 +1026,13 @@ struct rte_eth_dev_info {
 	uint16_t max_vfs; /**< Maximum number of VFs. */
 	uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */
 	uint64_t rx_offload_capa;
-	/**< Device per port RX offload capabilities. */
+	/**< All RX offload capabilities including all per-queue ones */
 	uint64_t tx_offload_capa;
-	/**< Device per port TX offload capabilities. */
+	/**< All TX offload capabilities including all per-queue ones */
 	uint64_t rx_queue_offload_capa;
-	/**< Device per queue RX offload capabilities. */
+	/**< Device per-queue RX offload capabilities. */
 	uint64_t tx_queue_offload_capa;
-	/**< Device per queue TX offload capabilities. */
+	/**< Device per-queue TX offload capabilities. */
 	uint16_t reta_size;
 	/**< Device redirection table size, the total number of entries. */
 	uint8_t hash_key_size; /**< Hash key size in bytes */
@@ -1029,6 +1049,17 @@ struct rte_eth_dev_info {
 	/** Configured number of rx/tx queues */
 	uint16_t nb_rx_queues; /**< Number of RX queues. */
 	uint16_t nb_tx_queues; /**< Number of TX queues. */
+	/** Rx parameter recommendations */
+	struct rte_eth_dev_portconf default_rxportconf;
+	/** Tx parameter recommendations */
+	struct rte_eth_dev_portconf default_txportconf;
+	/** Generic device capabilities (RTE_ETH_DEV_CAPA_). */
+	uint64_t dev_capa;
+	/**
+	 * Switching information for ports on a device with a
+	 * embedded managed interconnect/switch.
+	 */
+	struct rte_eth_switch_info switch_info;
 };
 
 /**
@@ -1117,21 +1148,19 @@ struct rte_eth_dcb_info {
 #define RTE_ETH_QUEUE_STATE_STOPPED 0
 #define RTE_ETH_QUEUE_STATE_STARTED 1
 
-struct rte_eth_dev;
-
 #define RTE_ETH_ALL RTE_MAX_ETHPORTS
 
 /* Macros to check for valid port */
 #define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \
 	if (!rte_eth_dev_is_valid_port(port_id)) { \
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
 		return retval; \
 	} \
 } while (0)
 
 #define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \
 	if (!rte_eth_dev_is_valid_port(port_id)) { \
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
 		return; \
 	} \
 } while (0)
@@ -1199,12 +1228,16 @@ typedef uint16_t (*rte_tx_callback_fn)(uint16_t port_id, uint16_t queue,
 	struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
 
 /**
- * A set of values to describe the possible states of an eth device.
+ * Possible states of an ethdev port.
  */
 enum rte_eth_dev_state {
+	/** Device is unused before being probed. */
 	RTE_ETH_DEV_UNUSED = 0,
+	/** Device is attached when allocated in probing. */
 	RTE_ETH_DEV_ATTACHED,
+	/** The deferred state is useless and replaced by ownership. */
 	RTE_ETH_DEV_DEFERRED,
+	/** Device is in removed state when plug-out is detected. */
 	RTE_ETH_DEV_REMOVED,
 };
 
@@ -1233,11 +1266,10 @@ struct rte_eth_dev_owner {
 #define RTE_ETH_DEV_BONDED_SLAVE 0x0004
 /** Device supports device removal interrupt */
 #define RTE_ETH_DEV_INTR_RMV     0x0008
+/** Device is port representor */
+#define RTE_ETH_DEV_REPRESENTOR  0x0010
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
  * Iterates over valid ethdev ports owned by a specific owner.
  *
  * @param port_id
@@ -1248,7 +1280,7 @@ struct rte_eth_dev_owner {
  * @return
  *   Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none.
  */
-uint64_t __rte_experimental rte_eth_find_next_owned_by(uint16_t port_id,
+uint64_t rte_eth_find_next_owned_by(uint16_t port_id,
 		const uint64_t owner_id);
 
 /**
@@ -1362,9 +1394,32 @@ int __rte_experimental rte_eth_dev_owner_get(const uint16_t port_id,
  * @return
  *   - The total number of usable Ethernet devices.
  */
+__rte_deprecated
 uint16_t rte_eth_dev_count(void);
 
 /**
+ * Get the number of ports which are usable for the application.
+ *
+ * These devices must be iterated by using the macro
+ * ``RTE_ETH_FOREACH_DEV`` or ``RTE_ETH_FOREACH_DEV_OWNED_BY``
+ * to deal with non-contiguous ranges of devices.
+ *
+ * @return
+ *   The count of available Ethernet devices.
+ */
+uint16_t rte_eth_dev_count_avail(void);
+
+/**
+ * Get the total number of ports which are allocated.
+ *
+ * Some devices may not be available for the application.
+ *
+ * @return
+ *   The total count of Ethernet devices.
+ */
+uint16_t __rte_experimental rte_eth_dev_count_total(void);
+
+/**
  * Attach a new Ethernet device specified by arguments.
  *
  * @param devargs
@@ -1376,6 +1431,7 @@ uint16_t rte_eth_dev_count(void);
  * @return
  *  0 on success and port_id is filled, negative on error
  */
+__rte_deprecated
 int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
 
 /**
@@ -1391,6 +1447,7 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
  * @return
  *  0 on success and devname is filled, negative on error
  */
+__rte_deprecated
 int rte_eth_dev_detach(uint16_t port_id, char *devname);
 
 /**
@@ -1454,8 +1511,15 @@ const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload);
  *        The Rx offload bitfield API is obsolete and will be deprecated.
  *        Applications should set the ignore_bitfield_offloads bit on *rxmode*
  *        structure and use offloads field to set per-port offloads instead.
- *     - the Receive Side Scaling (RSS) configuration when using multiple RX
- *         queues per port.
+ *     -  Any offloading set in eth_conf->[rt]xmode.offloads must be within
+ *        the [rt]x_offload_capa returned from rte_eth_dev_infos_get().
+ *        Any type of device supported offloading set in the input argument
+ *        eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled
+ *        on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup()
+ *     -  the Receive Side Scaling (RSS) configuration when using multiple RX
+ *        queues per port. Any RSS hash function set in eth_conf->rss_conf.rss_hf
+ *        must be within the flow_type_rss_offloads provided by drivers via
+ *        rte_eth_dev_infos_get() API.
  *
  *   Embedding all configuration information in a single data structure
  *   is the more flexible method that allows the addition of new features
@@ -1510,6 +1574,13 @@ rte_eth_dev_is_removed(uint16_t port_id);
  *   ring.
  *   In addition it contains the hardware offloads features to activate using
  *   the DEV_RX_OFFLOAD_* flags.
+ *   If an offloading set in rx_conf->offloads
+ *   hasn't been set in the input argument eth_conf->rxmode.offloads
+ *   to rte_eth_dev_configure(), it is a new added offloading, it must be
+ *   per-queue type and it is enabled for the queue.
+ *   No need to repeat any bit in rx_conf->offloads which has already been
+ *   enabled in rte_eth_dev_configure() at port level. An offloading enabled
+ *   at port level can't be disabled at queue level.
  * @param mb_pool
  *   The pointer to the memory pool from which to allocate *rte_mbuf* network
  *   memory buffers to populate each descriptor of the receive ring.
@@ -1561,14 +1632,14 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
  *     The *tx_rs_thresh* value should be less or equal then
  *     *tx_free_thresh* value, and both of them should be less then
  *     *nb_tx_desc* - 3.
- *   - The *txq_flags* member contains flags to pass to the TX queue setup
- *     function to configure the behavior of the TX queue. This should be set
- *     to 0 if no special configuration is required.
- *     This API is obsolete and will be deprecated. Applications
- *     should set it to ETH_TXQ_FLAGS_IGNORE and use
- *     the offloads field below.
  *   - The *offloads* member contains Tx offloads to be enabled.
- *     Offloads which are not set cannot be used on the datapath.
+ *     If an offloading set in tx_conf->offloads
+ *     hasn't been set in the input argument eth_conf->txmode.offloads
+ *     to rte_eth_dev_configure(), it is a new added offloading, it must be
+ *     per-queue type and it is enabled for the queue.
+ *     No need to repeat any bit in tx_conf->offloads which has already been
+ *     enabled in rte_eth_dev_configure() at port level. An offloading enabled
+ *     at port level can't be disabled at queue level.
  *
  *     Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces
  *     the transmit function to use default values.
@@ -1889,6 +1960,15 @@ int rte_eth_stats_reset(uint16_t port_id);
 /**
  * Retrieve names of extended statistics of an Ethernet device.
  *
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ *  xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ *  xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats_names
@@ -1913,13 +1993,20 @@ int rte_eth_xstats_get_names(uint16_t port_id,
 /**
  * Retrieve extended statistics of an Ethernet device.
  *
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ *  xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ *  xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats
  *   A pointer to a table of structure of type *rte_eth_xstat*
- *   to be filled with device statistics ids and values: id is the
- *   index of the name string in xstats_names (see rte_eth_xstats_get_names()),
- *   and value is the statistic counter.
+ *   to be filled with device statistics ids and values.
  *   This parameter can be set to NULL if n is 0.
  * @param n
  *   The size of the xstats array (number of elements).
@@ -2030,7 +2117,7 @@ void rte_eth_xstats_reset(uint16_t port_id);
  * @param stat_idx
  *   The per-queue packet statistics functionality number that the transmit
  *   queue is to be assigned.
- *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ *   The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
  * @return
  *   Zero if successful. Non-zero otherwise.
  */
@@ -2050,7 +2137,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
  * @param stat_idx
  *   The per-queue packet statistics functionality number that the receive
  *   queue is to be assigned.
- *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ *   The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
  * @return
  *   Zero if successful. Non-zero otherwise.
  */
@@ -2438,6 +2525,46 @@ int
 rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt);
 
 /**
+ * Subtypes for IPsec offload event(@ref RTE_ETH_EVENT_IPSEC) raised by
+ * eth device.
+ */
+enum rte_eth_event_ipsec_subtype {
+	RTE_ETH_EVENT_IPSEC_UNKNOWN = 0,
+			/**< Unknown event type */
+	RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW,
+			/**< Sequence number overflow */
+	RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY,
+			/**< Soft time expiry of SA */
+	RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY,
+			/**< Soft byte expiry of SA */
+	RTE_ETH_EVENT_IPSEC_MAX
+			/**< Max value of this enum */
+};
+
+/**
+ * Descriptor for @ref RTE_ETH_EVENT_IPSEC event. Used by eth dev to send extra
+ * information of the IPsec offload event.
+ */
+struct rte_eth_event_ipsec_desc {
+	enum rte_eth_event_ipsec_subtype subtype;
+			/**< Type of RTE_ETH_EVENT_IPSEC_* event */
+	uint64_t metadata;
+			/**< Event specific metadata
+			 *
+			 * For the following events, *userdata* registered
+			 * with the *rte_security_session* would be returned
+			 * as metadata,
+			 *
+			 * - @ref RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW
+			 * - @ref RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY
+			 * - @ref RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY
+			 *
+			 * @see struct rte_security_session_conf
+			 *
+			 */
+};
+
+/**
  * The eth device event type for interrupt, and maybe others in the future.
  */
 enum rte_eth_event_type {
@@ -2452,6 +2579,7 @@ enum rte_eth_event_type {
 	RTE_ETH_EVENT_INTR_RMV, /**< device removal event */
 	RTE_ETH_EVENT_NEW,      /**< port is probed */
 	RTE_ETH_EVENT_DESTROY,  /**< port is released */
+	RTE_ETH_EVENT_IPSEC,    /**< IPsec offload related event */
 	RTE_ETH_EVENT_MAX       /**< max value of this enum */
 };
 
@@ -3004,6 +3132,8 @@ int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
 int rte_eth_dev_get_dcb_info(uint16_t port_id,
 			     struct rte_eth_dcb_info *dcb_info);
 
+struct rte_eth_rxtx_callback;
+
 /**
  * Add a callback to be called on packet RX on a given port and queue.
  *
@@ -3028,7 +3158,8 @@ int rte_eth_dev_get_dcb_info(uint16_t port_id,
  *   NULL on error.
  *   On success, a pointer value which can later be used to remove the callback.
  */
-void *rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
+const struct rte_eth_rxtx_callback *
+rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_rx_callback_fn fn, void *user_param);
 
 /**
@@ -3056,7 +3187,8 @@ void *rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
  *   NULL on error.
  *   On success, a pointer value which can later be used to remove the callback.
  */
-void *rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
+const struct rte_eth_rxtx_callback *
+rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_rx_callback_fn fn, void *user_param);
 
 /**
@@ -3083,11 +3215,10 @@ void *rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
  *   NULL on error.
  *   On success, a pointer value which can later be used to remove the callback.
  */
-void *rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
+const struct rte_eth_rxtx_callback *
+rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
 		rte_tx_callback_fn fn, void *user_param);
 
-struct rte_eth_rxtx_callback;
-
 /**
  * Remove an RX packet callback from a given port and queue.
  *
@@ -3119,7 +3250,7 @@ struct rte_eth_rxtx_callback;
  *               is NULL or not found for the port/queue.
  */
 int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
-		struct rte_eth_rxtx_callback *user_cb);
+		const struct rte_eth_rxtx_callback *user_cb);
 
 /**
  * Remove a TX packet callback from a given port and queue.
@@ -3152,7 +3283,7 @@ int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
  *               is NULL or not found for the port/queue.
  */
 int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
-		struct rte_eth_rxtx_callback *user_cb);
+		const struct rte_eth_rxtx_callback *user_cb);
 
 /**
  * Retrieve information about given port's RX queue.
@@ -3262,6 +3393,49 @@ int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info);
 int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Retrieve the type and size of plugin module EEPROM
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param modinfo
+ *   The type and size of plugin module EEPROM.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EIO) if device is removed.
+ *   - others depends on the specific operations implementation.
+ */
+int __rte_experimental
+rte_eth_dev_get_module_info(uint16_t port_id,
+			    struct rte_eth_dev_module_info *modinfo);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Retrieve the data of plugin module EEPROM
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param info
+ *   The template includes the plugin module EEPROM attributes, and the
+ *   buffer for return plugin module EEPROM data.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EIO) if device is removed.
+ *   - others depends on the specific operations implementation.
+ */
+int __rte_experimental
+rte_eth_dev_get_module_eeprom(uint16_t port_id,
+			      struct rte_dev_eeprom_info *info);
+
+/**
  * Set the list of multicast addresses to filter on an Ethernet device.
  *
  * @param port_id
@@ -3455,8 +3629,11 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
 				  uint8_t en);
 
 /**
-* Get the port id from pci address or device name
-* Ex: 0000:2:00.0 or vdev name net_pcap0
+* Get the port id from device name. The device name should be specified
+* as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:2:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tap0]
 *
 * @param name
 *  pci address or name of the device
@@ -3470,12 +3647,15 @@ int
 rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id);
 
 /**
-* Get the device name from port id
+* Get the device name from port id. The device name is specified as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:02:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tun0|tap0]
 *
 * @param port_id
-*   pointer to port identifier of the device
+*   Port identifier of the device.
 * @param name
-*  pci address or name of the device
+*   Buffer of size RTE_ETH_NAME_MAX_LEN to store the name.
 * @return
 *   - (0) if successful.
 *   - (-EINVAL) on failure.
@@ -3530,7 +3710,7 @@ rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool);
  *   - pointer to security context on success.
  */
 void *
-rte_eth_dev_get_sec_ctx(uint8_t port_id);
+rte_eth_dev_get_sec_ctx(uint16_t port_id);
 
 
 #include <rte_ethdev_core.h>
@@ -3622,23 +3802,25 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
 		 struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
 {
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	uint16_t nb_rx;
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
 
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return 0;
 	}
 #endif
-	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
-			rx_pkts, nb_pkts);
+	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
+				     rx_pkts, nb_pkts);
 
 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
-	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+	if (unlikely(dev->post_rx_burst_cbs[queue_id] != NULL)) {
+		struct rte_eth_rxtx_callback *cb =
+				dev->post_rx_burst_cbs[queue_id];
 
-	if (unlikely(cb != NULL)) {
 		do {
 			nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx,
 						nb_pkts, cb->param);
@@ -3673,7 +3855,7 @@ rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id)
 	if (queue_id >= dev->data->nb_rx_queues)
 		return -EINVAL;
 
-	return (*dev->dev_ops->rx_queue_count)(dev, queue_id);
+	return (int)(*dev->dev_ops->rx_queue_count)(dev, queue_id);
 }
 
 /**
@@ -3860,7 +4042,10 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
  *
  * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can
  * invoke this function concurrently on the same tx queue without SW lock.
- * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags
+ * @see rte_eth_dev_info_get, struct rte_eth_txconf::offloads
+ *
+ * @see rte_eth_tx_prepare to perform some prior checks or adjustments
+ * for offloads.
  *
  * @param port_id
  *   The port identifier of the Ethernet device.
@@ -3890,7 +4075,7 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0);
 
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return 0;
 	}
 #endif
@@ -3976,7 +4161,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id);
 		rte_errno = -EINVAL;
 		return 0;
 	}
@@ -3986,7 +4171,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		rte_errno = -EINVAL;
 		return 0;
 	}
@@ -4058,8 +4243,9 @@ rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id,
 
 	/* All packets sent, or to be dealt with by callback below */
 	if (unlikely(sent != to_send))
-		buffer->error_callback(&buffer->pkts[sent], to_send - sent,
-				buffer->error_userdata);
+		buffer->error_callback(&buffer->pkts[sent],
+				       (uint16_t)(to_send - sent),
+				       buffer->error_userdata);
 
 	return sent;
 }
diff --git a/lib/librte_ether/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index e5681e46..33d12b3a 100644
--- a/lib/librte_ether/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -255,7 +255,7 @@ typedef int (*eth_mac_addr_add_t)(struct rte_eth_dev *dev,
 				  uint32_t vmdq);
 /**< @internal Set a MAC address into Receive Address Address Register */
 
-typedef void (*eth_mac_addr_set_t)(struct rte_eth_dev *dev,
+typedef int (*eth_mac_addr_set_t)(struct rte_eth_dev *dev,
 				  struct ether_addr *mac_addr);
 /**< @internal Set a MAC address into Receive Address Address Register */
 
@@ -337,6 +337,14 @@ typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev,
 				struct rte_dev_eeprom_info *info);
 /**< @internal Program eeprom data  */
 
+typedef int (*eth_get_module_info_t)(struct rte_eth_dev *dev,
+				     struct rte_eth_dev_module_info *modinfo);
+/**< @internal Retrieve type and size of plugin module eeprom */
+
+typedef int (*eth_get_module_eeprom_t)(struct rte_eth_dev *dev,
+				       struct rte_dev_eeprom_info *info);
+/**< @internal Retrieve plugin module eeprom data */
+
 typedef int (*eth_l2_tunnel_eth_type_conf_t)
 	(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel);
 /**< @internal config l2 tunnel ether type */
@@ -467,6 +475,10 @@ struct eth_dev_ops {
 	eth_get_eeprom_t           get_eeprom;        /**< Get eeprom data. */
 	eth_set_eeprom_t           set_eeprom;        /**< Set eeprom. */
 
+	eth_get_module_info_t      get_module_info;
+	/** Get plugin module eeprom attribute. */
+	eth_get_module_eeprom_t    get_module_eeprom;
+	/** Get plugin module eeprom data. */
 
 	eth_filter_ctrl_t          filter_ctrl; /**< common filter control. */
 
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
new file mode 100644
index 00000000..c6d9bc1a
--- /dev/null
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#ifndef _RTE_ETHDEV_DRIVER_H_
+#define _RTE_ETHDEV_DRIVER_H_
+
+/**
+ * @file
+ *
+ * RTE Ethernet Device PMD API
+ *
+ * These APIs for the use from Ethernet drivers, user applications shouldn't
+ * use them.
+ *
+ */
+
+#include <rte_ethdev.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @internal
+ * Returns a ethdev slot specified by the unique identifier name.
+ *
+ * @param	name
+ *  The pointer to the Unique identifier name for each Ethernet device
+ * @return
+ *   - The pointer to the ethdev slot, on success. NULL on error
+ */
+struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
+
+/**
+ * @internal
+ * Allocates a new ethdev slot for an ethernet device and returns the pointer
+ * to that slot for the driver to use.
+ *
+ * @param	name	Unique identifier name for each Ethernet device
+ * @return
+ *   - Slot in the rte_dev_devices array for a new device;
+ */
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
+
+/**
+ * @internal
+ * Attach to the ethdev already initialized by the primary
+ * process.
+ *
+ * @param       name    Ethernet device's name.
+ * @return
+ *   - Success: Slot in the rte_dev_devices array for attached
+ *        device.
+ *   - Error: Null pointer.
+ */
+struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name);
+
+/**
+ * @internal
+ * Release the specified ethdev port.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @return
+ *   - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
+ * Release device queues and clear its configuration to force the user
+ * application to reconfigure it. It is for internal use only.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  void
+ */
+void _rte_eth_dev_reset(struct rte_eth_dev *dev);
+
+/**
+ * @internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param event
+ *  Eth device interrupt event type.
+ * @param ret_param
+ *  To pass data back to user application.
+ *  This allows the user application to decide if a particular function
+ *  is permitted or not.
+ *
+ * @return
+ *  int
+ */
+int _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
+		enum rte_eth_event_type event, void *ret_param);
+
+/**
+ * @internal
+ * This is the last step of device probing.
+ * It must be called after a port is allocated and initialized successfully.
+ *
+ * The notification RTE_ETH_EVENT_NEW is sent to other entities
+ * (libraries and applications).
+ * The state is set as RTE_ETH_DEV_ATTACHED.
+ *
+ * @param dev
+ *  New ethdev port.
+ */
+void rte_eth_dev_probing_finish(struct rte_eth_dev *dev);
+
+/**
+ * Create memzone for HW rings.
+ * malloc can't be used as the physical address is needed.
+ * If the memzone is already created, then this function returns a ptr
+ * to the old one.
+ *
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ * @param name
+ *   The name of the memory zone
+ * @param queue_id
+ *   The index of the queue to add to name
+ * @param size
+ *   The sizeof of the memory area
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ */
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
+			 uint16_t queue_id, size_t size,
+			 unsigned align, int socket_id);
+
+/**
+ * @internal
+ * Atomically set the link status for the specific device.
+ * It is for use by DPDK device driver use only.
+ * User applications should not call it
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param link
+ *  New link status value.
+ * @return
+ *  Same convention as eth_link_update operation.
+ *  0   if link up status has changed
+ *  -1  if link up status was unchanged
+ */
+static inline int
+rte_eth_linkstatus_set(struct rte_eth_dev *dev,
+		       const struct rte_eth_link *new_link)
+{
+	volatile uint64_t *dev_link
+		 = (volatile uint64_t *)&(dev->data->dev_link);
+	union {
+		uint64_t val64;
+		struct rte_eth_link link;
+	} orig;
+
+	RTE_BUILD_BUG_ON(sizeof(*new_link) != sizeof(uint64_t));
+
+	orig.val64 = rte_atomic64_exchange(dev_link,
+					   *(const uint64_t *)new_link);
+
+	return (orig.link.link_status == new_link->link_status) ? -1 : 0;
+}
+
+/**
+ * @internal
+ * Atomically get the link speed and status.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param link
+ *  link status value.
+ */
+static inline void
+rte_eth_linkstatus_get(const struct rte_eth_dev *dev,
+		       struct rte_eth_link *link)
+{
+	volatile uint64_t *src = (uint64_t *)&(dev->data->dev_link);
+	uint64_t *dst = (uint64_t *)link;
+
+	RTE_BUILD_BUG_ON(sizeof(*link) != sizeof(uint64_t));
+
+#ifdef __LP64__
+	/* if cpu arch has 64 bit unsigned lon then implicitly atomic */
+	*dst = *src;
+#else
+	/* can't use rte_atomic64_read because it returns signed int */
+	do {
+		*dst = *src;
+	} while (!rte_atomic64_cmpset(src, *dst, *dst));
+#endif
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Allocate an unique switch domain identifier.
+ *
+ * A pool of switch domain identifiers which can be allocated on request. This
+ * will enabled devices which support the concept of switch domains to request
+ * a switch domain id which is guaranteed to be unique from other devices
+ * running in the same process.
+ *
+ * @param domain_id
+ *  switch domain identifier parameter to pass back to application
+ *
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental
+rte_eth_switch_domain_alloc(uint16_t *domain_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Free switch domain.
+ *
+ * Return a switch domain identifier to the pool of free identifiers after it is
+ * no longer in use by device.
+ *
+ * @param domain_id
+ *  switch domain identifier to free
+ *
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental
+rte_eth_switch_domain_free(uint16_t domain_id);
+
+/** Generic Ethernet device arguments  */
+struct rte_eth_devargs {
+	uint16_t ports[RTE_MAX_ETHPORTS];
+	/** port/s number to enable on a multi-port single function */
+	uint16_t nb_ports;
+	/** number of ports in ports field */
+	uint16_t representor_ports[RTE_MAX_ETHPORTS];
+	/** representor port/s identifier to enable on device */
+	uint16_t nb_representor_ports;
+	/** number of ports in representor port field */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * PMD helper function to parse ethdev arguments
+ *
+ * @param devargs
+ *  device arguments
+ * @param eth_devargs
+ *  parsed ethdev specific arguments.
+ *
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental
+rte_eth_devargs_parse(const char *devargs, struct rte_eth_devargs *eth_devargs);
+
+
+typedef int (*ethdev_init_t)(struct rte_eth_dev *ethdev, void *init_params);
+typedef int (*ethdev_bus_specific_init)(struct rte_eth_dev *ethdev,
+	void *bus_specific_init_params);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * PMD helper function for the creation of a new ethdev ports.
+ *
+ * @param device
+ *  rte_device handle.
+ * @param name
+ *  port name.
+ * @param priv_data_size
+ *  size of private data required for port.
+ * @param bus_specific_init
+ *  port bus specific initialisation callback function
+ * @param bus_init_params
+ *  port bus specific initialisation parameters
+ * @param ethdev_init
+ *  device specific port initialization callback function
+ * @param init_params
+ *  port initialisation parameters
+ *
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental
+rte_eth_dev_create(struct rte_device *device, const char *name,
+	size_t priv_data_size,
+	ethdev_bus_specific_init bus_specific_init, void *bus_init_params,
+	ethdev_init_t ethdev_init, void *init_params);
+
+
+typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * PMD helper function for cleaing up the resources of a ethdev port on it's
+ * destruction.
+ *
+ * @param ethdev
+ *   ethdev handle of port.
+ * @param ethdev_uninit
+ *   device specific port un-initialise callback function
+ *
+ * @return
+ *   Negative errno value on error, 0 on success.
+ */
+int __rte_experimental
+rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);
+
+/**
+ * PMD helper function to check if keeping CRC is requested
+ *
+ * @note
+ * When CRC_STRIP offload flag is removed and default behavior switch to
+ * strip CRC, as planned, this helper function is not that useful and will be
+ * removed. In PMDs this function will be replaced with check:
+ *   if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+ *
+ * @param rx_offloads
+ *   offload bits to be applied
+ *
+ * @return
+ *   Return positive if keeping CRC is requested,
+ *   zero if stripping CRC is requested
+ */
+static inline int
+rte_eth_dev_must_keep_crc(uint64_t rx_offloads)
+{
+	if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
+		return 0;
+
+	/* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */
+	return 1;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_DRIVER_H_ */
diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h
index 897ce5b4..f652596f 100644
--- a/lib/librte_ether/rte_ethdev_pci.h
+++ b/lib/librte_ethdev/rte_ethdev_pci.h
@@ -53,8 +53,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
 	struct rte_pci_device *pci_dev)
 {
 	if ((eth_dev == NULL) || (pci_dev == NULL)) {
-		RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n",
-				eth_dev, pci_dev);
+		RTE_ETHDEV_LOG(ERR, "NULL pointer eth_dev=%p pci_dev=%p",
+			(void *)eth_dev, (void *)pci_dev);
 		return;
 	}
 
@@ -70,6 +70,18 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
 	eth_dev->data->numa_node = pci_dev->device.numa_node;
 }
 
+static inline int
+eth_dev_pci_specific_init(struct rte_eth_dev *eth_dev, void *bus_device) {
+	struct rte_pci_device *pci_dev = bus_device;
+
+	if (!pci_dev)
+		return -ENODEV;
+
+	rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+	return 0;
+}
+
 /**
  * @internal
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
@@ -123,9 +135,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
 static inline void
 rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 {
-	/* free ether device */
-	rte_eth_dev_release_port(eth_dev);
-
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		rte_free(eth_dev->data->dev_private);
 
@@ -139,6 +148,9 @@ rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 
 	eth_dev->device = NULL;
 	eth_dev->intr_handle = NULL;
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
 }
 
 typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev);
@@ -163,6 +175,8 @@ rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev,
 	ret = dev_init(eth_dev);
 	if (ret)
 		rte_eth_dev_pci_release(eth_dev);
+	else
+		rte_eth_dev_probing_finish(eth_dev);
 
 	return ret;
 }
diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ethdev/rte_ethdev_vdev.h
index 259feda3..259feda3 100644
--- a/lib/librte_ether/rte_ethdev_vdev.h
+++ b/lib/librte_ethdev/rte_ethdev_vdev.h
diff --git a/lib/librte_ether/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 87f02fb7..38f117f0 100644
--- a/lib/librte_ether/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -16,7 +16,6 @@ DPDK_2.2 {
 	rte_eth_dev_count;
 	rte_eth_dev_default_mac_addr_set;
 	rte_eth_dev_detach;
-	rte_eth_dev_filter_ctrl;
 	rte_eth_dev_filter_supported;
 	rte_eth_dev_flow_ctrl_get;
 	rte_eth_dev_flow_ctrl_set;
@@ -128,11 +127,6 @@ DPDK_17.02 {
 
 	_rte_eth_dev_reset;
 	rte_eth_dev_fw_version_get;
-	rte_flow_create;
-	rte_flow_destroy;
-	rte_flow_flush;
-	rte_flow_query;
-	rte_flow_validate;
 
 } DPDK_16.07;
 
@@ -153,9 +147,8 @@ DPDK_17.08 {
 
 	_rte_eth_dev_callback_process;
 	rte_eth_dev_adjust_nb_rx_tx_desc;
-	rte_flow_copy;
-	rte_flow_isolate;
 	rte_tm_capabilities_get;
+	rte_tm_get_number_of_leaf_nodes;
 	rte_tm_hierarchy_commit;
 	rte_tm_level_capabilities_get;
 	rte_tm_mark_ip_dscp;
@@ -193,7 +186,6 @@ DPDK_17.11 {
 	rte_eth_dev_get_sec_ctx;
 	rte_eth_dev_pool_ops_supported;
 	rte_eth_dev_reset;
-	rte_flow_error_set;
 
 } DPDK_17.08;
 
@@ -204,9 +196,39 @@ DPDK_18.02 {
 
 } DPDK_17.11;
 
+DPDK_18.05 {
+	global:
+
+	rte_eth_dev_count_avail;
+	rte_eth_dev_probing_finish;
+	rte_eth_find_next_owned_by;
+	rte_flow_copy;
+	rte_flow_create;
+	rte_flow_destroy;
+	rte_flow_error_set;
+	rte_flow_flush;
+	rte_flow_isolate;
+	rte_flow_query;
+	rte_flow_validate;
+
+} DPDK_18.02;
+
+DPDK_18.08 {
+	global:
+
+	rte_eth_dev_logtype;
+
+} DPDK_18.05;
+
 EXPERIMENTAL {
 	global:
 
+	rte_eth_devargs_parse;
+	rte_eth_dev_count_total;
+	rte_eth_dev_create;
+	rte_eth_dev_destroy;
+	rte_eth_dev_get_module_eeprom;
+	rte_eth_dev_get_module_info;
 	rte_eth_dev_is_removed;
 	rte_eth_dev_owner_delete;
 	rte_eth_dev_owner_get;
@@ -215,7 +237,9 @@ EXPERIMENTAL {
 	rte_eth_dev_owner_unset;
 	rte_eth_dev_rx_offload_name;
 	rte_eth_dev_tx_offload_name;
-	rte_eth_find_next_owned_by;
+	rte_eth_switch_domain_alloc;
+	rte_eth_switch_domain_free;
+	rte_flow_expand_rss;
 	rte_mtr_capabilities_get;
 	rte_mtr_create;
 	rte_mtr_destroy;
@@ -228,5 +252,4 @@ EXPERIMENTAL {
 	rte_mtr_policer_actions_update;
 	rte_mtr_stats_read;
 	rte_mtr_stats_update;
-
-} DPDK_17.11;
+};
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index 38f2d27b..cff4b520 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright 2016 6WIND S.A.
- * Copyright 2016 Mellanox.
+ * Copyright 2016 Mellanox Technologies, Ltd
  */
 
 #include <errno.h>
@@ -38,8 +38,9 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
 	MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
 	MK_FLOW_ITEM(PF, 0),
 	MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-	MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
-	MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
+	MK_FLOW_ITEM(PHY_PORT, sizeof(struct rte_flow_item_phy_port)),
+	MK_FLOW_ITEM(PORT_ID, sizeof(struct rte_flow_item_port_id)),
+	MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)),
 	MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
 	MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
 	MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
@@ -54,6 +55,17 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
 	MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
 	MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
 	MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)),
+	MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)),
+	MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)),
+	MK_FLOW_ITEM(IPV6_EXT, sizeof(struct rte_flow_item_ipv6_ext)),
+	MK_FLOW_ITEM(ICMP6, sizeof(struct rte_flow_item_icmp6)),
+	MK_FLOW_ITEM(ICMP6_ND_NS, sizeof(struct rte_flow_item_icmp6_nd_ns)),
+	MK_FLOW_ITEM(ICMP6_ND_NA, sizeof(struct rte_flow_item_icmp6_nd_na)),
+	MK_FLOW_ITEM(ICMP6_ND_OPT, sizeof(struct rte_flow_item_icmp6_nd_opt)),
+	MK_FLOW_ITEM(ICMP6_ND_OPT_SLA_ETH,
+		     sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)),
+	MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH,
+		     sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)),
 };
 
 /** Generate flow_action[] entry. */
@@ -72,11 +84,31 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(FLAG, 0),
 	MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
 	MK_FLOW_ACTION(DROP, 0),
-	MK_FLOW_ACTION(COUNT, 0),
-	MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
-	MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
+	MK_FLOW_ACTION(COUNT, sizeof(struct rte_flow_action_count)),
+	MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)),
 	MK_FLOW_ACTION(PF, 0),
 	MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
+	MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)),
+	MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)),
+	MK_FLOW_ACTION(OF_SET_MPLS_TTL,
+		       sizeof(struct rte_flow_action_of_set_mpls_ttl)),
+	MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0),
+	MK_FLOW_ACTION(OF_SET_NW_TTL,
+		       sizeof(struct rte_flow_action_of_set_nw_ttl)),
+	MK_FLOW_ACTION(OF_DEC_NW_TTL, 0),
+	MK_FLOW_ACTION(OF_COPY_TTL_OUT, 0),
+	MK_FLOW_ACTION(OF_COPY_TTL_IN, 0),
+	MK_FLOW_ACTION(OF_POP_VLAN, 0),
+	MK_FLOW_ACTION(OF_PUSH_VLAN,
+		       sizeof(struct rte_flow_action_of_push_vlan)),
+	MK_FLOW_ACTION(OF_SET_VLAN_VID,
+		       sizeof(struct rte_flow_action_of_set_vlan_vid)),
+	MK_FLOW_ACTION(OF_SET_VLAN_PCP,
+		       sizeof(struct rte_flow_action_of_set_vlan_pcp)),
+	MK_FLOW_ACTION(OF_POP_MPLS,
+		       sizeof(struct rte_flow_action_of_pop_mpls)),
+	MK_FLOW_ACTION(OF_PUSH_MPLS,
+		       sizeof(struct rte_flow_action_of_push_mpls)),
 };
 
 static int
@@ -201,7 +233,7 @@ rte_flow_flush(uint16_t port_id,
 int
 rte_flow_query(uint16_t port_id,
 	       struct rte_flow *flow,
-	       enum rte_flow_action_type action,
+	       const struct rte_flow_action *action,
 	       void *data,
 	       struct rte_flow_error *error)
 {
@@ -255,60 +287,136 @@ rte_flow_error_set(struct rte_flow_error *error,
 	return -code;
 }
 
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-		    size_t *size, size_t *pad)
+/** Pattern item specification types. */
+enum item_spec_type {
+	ITEM_SPEC,
+	ITEM_LAST,
+	ITEM_MASK,
+};
+
+/** Compute storage space needed by item specification and copy it. */
+static size_t
+flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
+		    enum item_spec_type type)
 {
-	if (!item->spec) {
-		*size = 0;
+	size_t size = 0;
+	const void *data =
+		type == ITEM_SPEC ? item->spec :
+		type == ITEM_LAST ? item->last :
+		type == ITEM_MASK ? item->mask :
+		NULL;
+
+	if (!item->spec || !data)
 		goto empty;
-	}
 	switch (item->type) {
 		union {
 			const struct rte_flow_item_raw *raw;
 		} spec;
+		union {
+			const struct rte_flow_item_raw *raw;
+		} last;
+		union {
+			const struct rte_flow_item_raw *raw;
+		} mask;
+		union {
+			const struct rte_flow_item_raw *raw;
+		} src;
+		union {
+			struct rte_flow_item_raw *raw;
+		} dst;
+		size_t off;
 
-	/* Not a fall-through */
 	case RTE_FLOW_ITEM_TYPE_RAW:
 		spec.raw = item->spec;
-		*size = offsetof(struct rte_flow_item_raw, pattern) +
-			spec.raw->length * sizeof(*spec.raw->pattern);
+		last.raw = item->last ? item->last : item->spec;
+		mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask;
+		src.raw = data;
+		dst.raw = buf;
+		off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw),
+				     sizeof(*src.raw->pattern));
+		if (type == ITEM_SPEC ||
+		    (type == ITEM_MASK &&
+		     ((spec.raw->length & mask.raw->length) >=
+		      (last.raw->length & mask.raw->length))))
+			size = spec.raw->length & mask.raw->length;
+		else
+			size = last.raw->length & mask.raw->length;
+		size = off + size * sizeof(*src.raw->pattern);
+		if (dst.raw) {
+			memcpy(dst.raw, src.raw, sizeof(*src.raw));
+			dst.raw->pattern = memcpy((uint8_t *)dst.raw + off,
+						  src.raw->pattern,
+						  size - off);
+		}
 		break;
 	default:
-		*size = rte_flow_desc_item[item->type].size;
+		size = rte_flow_desc_item[item->type].size;
+		if (buf)
+			memcpy(buf, data, size);
 		break;
 	}
 empty:
-	*pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+	return RTE_ALIGN_CEIL(size, sizeof(double));
 }
 
-/** Compute storage space needed by action configuration. */
-static void
-flow_action_conf_size(const struct rte_flow_action *action,
-		      size_t *size, size_t *pad)
+/** Compute storage space needed by action configuration and copy it. */
+static size_t
+flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
 {
-	if (!action->conf) {
-		*size = 0;
+	size_t size = 0;
+
+	if (!action->conf)
 		goto empty;
-	}
 	switch (action->type) {
 		union {
 			const struct rte_flow_action_rss *rss;
-		} conf;
+		} src;
+		union {
+			struct rte_flow_action_rss *rss;
+		} dst;
+		size_t off;
 
-	/* Not a fall-through. */
 	case RTE_FLOW_ACTION_TYPE_RSS:
-		conf.rss = action->conf;
-		*size = offsetof(struct rte_flow_action_rss, queue) +
-			conf.rss->num * sizeof(*conf.rss->queue);
+		src.rss = action->conf;
+		dst.rss = buf;
+		off = 0;
+		if (dst.rss)
+			*dst.rss = (struct rte_flow_action_rss){
+				.func = src.rss->func,
+				.level = src.rss->level,
+				.types = src.rss->types,
+				.key_len = src.rss->key_len,
+				.queue_num = src.rss->queue_num,
+			};
+		off += sizeof(*src.rss);
+		if (src.rss->key_len) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			size = sizeof(*src.rss->key) * src.rss->key_len;
+			if (dst.rss)
+				dst.rss->key = memcpy
+					((void *)((uintptr_t)dst.rss + off),
+					 src.rss->key, size);
+			off += size;
+		}
+		if (src.rss->queue_num) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			size = sizeof(*src.rss->queue) * src.rss->queue_num;
+			if (dst.rss)
+				dst.rss->queue = memcpy
+					((void *)((uintptr_t)dst.rss + off),
+					 src.rss->queue, size);
+			off += size;
+		}
+		size = off;
 		break;
 	default:
-		*size = rte_flow_desc_action[action->type].size;
+		size = rte_flow_desc_action[action->type].size;
+		if (buf)
+			memcpy(buf, action->conf, size);
 		break;
 	}
 empty:
-	*pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+	return RTE_ALIGN_CEIL(size, sizeof(double));
 }
 
 /** Store a full rte_flow description. */
@@ -320,7 +428,6 @@ rte_flow_copy(struct rte_flow_desc *desc, size_t len,
 {
 	struct rte_flow_desc *fd = NULL;
 	size_t tmp;
-	size_t pad;
 	size_t off1 = 0;
 	size_t off2 = 0;
 	size_t size = 0;
@@ -345,24 +452,26 @@ store:
 				dst = memcpy(fd->data + off1, item,
 					     sizeof(*item));
 			off1 += sizeof(*item);
-			flow_item_spec_size(item, &tmp, &pad);
 			if (item->spec) {
 				if (fd)
-					dst->spec = memcpy(fd->data + off2,
-							   item->spec, tmp);
-				off2 += tmp + pad;
+					dst->spec = fd->data + off2;
+				off2 += flow_item_spec_copy
+					(fd ? fd->data + off2 : NULL, item,
+					 ITEM_SPEC);
 			}
 			if (item->last) {
 				if (fd)
-					dst->last = memcpy(fd->data + off2,
-							   item->last, tmp);
-				off2 += tmp + pad;
+					dst->last = fd->data + off2;
+				off2 += flow_item_spec_copy
+					(fd ? fd->data + off2 : NULL, item,
+					 ITEM_LAST);
 			}
 			if (item->mask) {
 				if (fd)
-					dst->mask = memcpy(fd->data + off2,
-							   item->mask, tmp);
-				off2 += tmp + pad;
+					dst->mask = fd->data + off2;
+				off2 += flow_item_spec_copy
+					(fd ? fd->data + off2 : NULL, item,
+					 ITEM_MASK);
 			}
 			off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
 		} while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
@@ -387,12 +496,11 @@ store:
 				dst = memcpy(fd->data + off1, action,
 					     sizeof(*action));
 			off1 += sizeof(*action);
-			flow_action_conf_size(action, &tmp, &pad);
 			if (action->conf) {
 				if (fd)
-					dst->conf = memcpy(fd->data + off2,
-							   action->conf, tmp);
-				off2 += tmp + pad;
+					dst->conf = fd->data + off2;
+				off2 += flow_action_conf_copy
+					(fd ? fd->data + off2 : NULL, action);
 			}
 			off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
 		} while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
@@ -418,3 +526,110 @@ store:
 	}
 	return 0;
 }
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pattern, uint64_t types,
+		    const struct rte_flow_expand_node graph[],
+		    int graph_root_index)
+{
+	const int elt_n = 8;
+	const struct rte_flow_item *item;
+	const struct rte_flow_expand_node *node = &graph[graph_root_index];
+	const int *next_node;
+	const int *stack[elt_n];
+	int stack_pos = 0;
+	struct rte_flow_item flow_items[elt_n];
+	unsigned int i;
+	size_t lsize;
+	size_t user_pattern_size = 0;
+	void *addr = NULL;
+
+	lsize = offsetof(struct rte_flow_expand_rss, entry) +
+		elt_n * sizeof(buf->entry[0]);
+	if (lsize <= size) {
+		buf->entry[0].priority = 0;
+		buf->entry[0].pattern = (void *)&buf->entry[elt_n];
+		buf->entries = 0;
+		addr = buf->entry[0].pattern;
+	}
+	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		const struct rte_flow_expand_node *next = NULL;
+
+		for (i = 0; node->next && node->next[i]; ++i) {
+			next = &graph[node->next[i]];
+			if (next->type == item->type)
+				break;
+		}
+		if (next)
+			node = next;
+		user_pattern_size += sizeof(*item);
+	}
+	user_pattern_size += sizeof(*item); /* Handle END item. */
+	lsize += user_pattern_size;
+	/* Copy the user pattern in the first entry of the buffer. */
+	if (lsize <= size) {
+		rte_memcpy(addr, pattern, user_pattern_size);
+		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
+		buf->entries = 1;
+	}
+	/* Start expanding. */
+	memset(flow_items, 0, sizeof(flow_items));
+	user_pattern_size -= sizeof(*item);
+	next_node = node->next;
+	stack[stack_pos] = next_node;
+	node = next_node ? &graph[*next_node] : NULL;
+	while (node) {
+		flow_items[stack_pos].type = node->type;
+		if (node->rss_types & types) {
+			/*
+			 * compute the number of items to copy from the
+			 * expansion and copy it.
+			 * When the stack_pos is 0, there are 1 element in it,
+			 * plus the addition END item.
+			 */
+			int elt = stack_pos + 2;
+
+			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
+			lsize += elt * sizeof(*item) + user_pattern_size;
+			if (lsize <= size) {
+				size_t n = elt * sizeof(*item);
+
+				buf->entry[buf->entries].priority =
+					stack_pos + 1;
+				buf->entry[buf->entries].pattern = addr;
+				buf->entries++;
+				rte_memcpy(addr, buf->entry[0].pattern,
+					   user_pattern_size);
+				addr = (void *)(((uintptr_t)addr) +
+						user_pattern_size);
+				rte_memcpy(addr, flow_items, n);
+				addr = (void *)(((uintptr_t)addr) + n);
+			}
+		}
+		/* Go deeper. */
+		if (node->next) {
+			next_node = node->next;
+			if (stack_pos++ == elt_n) {
+				rte_errno = E2BIG;
+				return -rte_errno;
+			}
+			stack[stack_pos] = next_node;
+		} else if (*(next_node + 1)) {
+			/* Follow up with the next possibility. */
+			++next_node;
+		} else {
+			/* Move to the next path. */
+			if (stack_pos)
+				next_node = stack[--stack_pos];
+			next_node++;
+			stack[stack_pos] = next_node;
+		}
+		node = *next_node ? &graph[*next_node] : NULL;
+	};
+	return lsize;
+}
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ethdev/rte_flow.h
index 13e42021..f8ba71cd 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ethdev/rte_flow.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright 2016 6WIND S.A.
- * Copyright 2016 Mellanox.
+ * Copyright 2016 Mellanox Technologies, Ltd
  */
 
 #ifndef RTE_FLOW_H_
@@ -14,8 +14,12 @@
  * associated actions in hardware through flow rules.
  */
 
+#include <stddef.h>
+#include <stdint.h>
+
 #include <rte_arp.h>
 #include <rte_ether.h>
+#include <rte_eth_ctrl.h>
 #include <rte_icmp.h>
 #include <rte_ip.h>
 #include <rte_sctp.h>
@@ -31,18 +35,20 @@ extern "C" {
 /**
  * Flow rule attributes.
  *
- * Priorities are set on two levels: per group and per rule within groups.
+ * Priorities are set on a per rule based within groups.
  *
- * Lower values denote higher priority, the highest priority for both levels
- * is 0, so that a rule with priority 0 in group 8 is always matched after a
- * rule with priority 8 in group 0.
+ * Lower values denote higher priority, the highest priority for a flow rule
+ * is 0, so that a flow that matches for than one rule, the rule with the
+ * lowest priority value will always be matched.
  *
  * Although optional, applications are encouraged to group similar rules as
  * much as possible to fully take advantage of hardware capabilities
  * (e.g. optimized matching) and work around limitations (e.g. a single
- * pattern type possibly allowed in a given group).
+ * pattern type possibly allowed in a given group). Applications should be
+ * aware that groups are not linked by default, and that they must be
+ * explicitly linked by the application using the JUMP action.
  *
- * Group and priority levels are arbitrary and up to the application, they
+ * Priority levels are arbitrary and up to the application, they
  * do not need to be contiguous nor start from 0, however the maximum number
  * varies between devices and may be affected by existing flow rules.
  *
@@ -65,10 +71,29 @@ extern "C" {
  */
 struct rte_flow_attr {
 	uint32_t group; /**< Priority group. */
-	uint32_t priority; /**< Priority level within group. */
+	uint32_t priority; /**< Rule priority level within group. */
 	uint32_t ingress:1; /**< Rule applies to ingress traffic. */
 	uint32_t egress:1; /**< Rule applies to egress traffic. */
-	uint32_t reserved:30; /**< Reserved, must be zero. */
+	/**
+	 * Instead of simply matching the properties of traffic as it would
+	 * appear on a given DPDK port ID, enabling this attribute transfers
+	 * a flow rule to the lowest possible level of any device endpoints
+	 * found in the pattern.
+	 *
+	 * When supported, this effectively enables an application to
+	 * re-route traffic not necessarily intended for it (e.g. coming
+	 * from or addressed to different physical ports, VFs or
+	 * applications) at the device level.
+	 *
+	 * It complements the behavior of some pattern items such as
+	 * RTE_FLOW_ITEM_TYPE_PHY_PORT and is meaningless without them.
+	 *
+	 * When transferring flow rules, ingress and egress attributes keep
+	 * their original meaning, as if processing traffic emitted or
+	 * received by the application.
+	 */
+	uint32_t transfer:1;
+	uint32_t reserved:29; /**< Reserved, must be zero. */
 };
 
 /**
@@ -76,15 +101,13 @@ struct rte_flow_attr {
  *
  * Pattern items fall in two categories:
  *
- * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
- *   IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+ * - Matching protocol headers and packet data, usually associated with a
  *   specification structure. These must be stacked in the same order as the
- *   protocol layers to match, starting from the lowest.
+ *   protocol layers to match inside packets, starting from the lowest.
  *
- * - Matching meta-data or affecting pattern processing (END, VOID, INVERT,
- *   PF, VF, PORT and so on), often without a specification structure. Since
- *   they do not match packet contents, these can be specified anywhere
- *   within item lists without affecting others.
+ * - Matching meta-data or affecting pattern processing, often without a
+ *   specification structure. Since they do not match packet contents, their
+ *   position in the list is usually not relevant.
  *
  * See the description of individual types for more information. Those
  * marked with [META] fall into the second category.
@@ -131,13 +154,8 @@ enum rte_flow_item_type {
 	/**
 	 * [META]
 	 *
-	 * Matches packets addressed to the physical function of the device.
-	 *
-	 * If the underlying device function differs from the one that would
-	 * normally receive the matched traffic, specifying this item
-	 * prevents it from reaching that device unless the flow rule
-	 * contains a PF action. Packets are not duplicated between device
-	 * instances by default.
+	 * Matches traffic originating from (ingress) or going to (egress)
+	 * the physical function of the current device.
 	 *
 	 * No associated specification structure.
 	 */
@@ -146,13 +164,8 @@ enum rte_flow_item_type {
 	/**
 	 * [META]
 	 *
-	 * Matches packets addressed to a virtual function ID of the device.
-	 *
-	 * If the underlying device function differs from the one that would
-	 * normally receive the matched traffic, specifying this item
-	 * prevents it from reaching that device unless the flow rule
-	 * contains a VF action. Packets are not duplicated between device
-	 * instances by default.
+	 * Matches traffic originating from (ingress) or going to (egress) a
+	 * given virtual function of the current device.
 	 *
 	 * See struct rte_flow_item_vf.
 	 */
@@ -161,17 +174,22 @@ enum rte_flow_item_type {
 	/**
 	 * [META]
 	 *
-	 * Matches packets coming from the specified physical port of the
-	 * underlying device.
+	 * Matches traffic originating from (ingress) or going to (egress) a
+	 * physical port of the underlying device.
 	 *
-	 * The first PORT item overrides the physical port normally
-	 * associated with the specified DPDK input port (port_id). This
-	 * item can be provided several times to match additional physical
-	 * ports.
+	 * See struct rte_flow_item_phy_port.
+	 */
+	RTE_FLOW_ITEM_TYPE_PHY_PORT,
+
+	/**
+	 * [META]
+	 *
+	 * Matches traffic originating from (ingress) or going to (egress) a
+	 * given DPDK port ID.
 	 *
-	 * See struct rte_flow_item_port.
+	 * See struct rte_flow_item_port_id.
 	 */
-	RTE_FLOW_ITEM_TYPE_PORT,
+	RTE_FLOW_ITEM_TYPE_PORT_ID,
 
 	/**
 	 * Matches a byte string of a given length at a given offset.
@@ -323,6 +341,78 @@ enum rte_flow_item_type {
 	 * See struct rte_flow_item_geneve.
 	 */
 	RTE_FLOW_ITEM_TYPE_GENEVE,
+
+	/**
+	 * Matches a VXLAN-GPE header.
+	 *
+	 * See struct rte_flow_item_vxlan_gpe.
+	 */
+	RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
+
+	/**
+	 * Matches an ARP header for Ethernet/IPv4.
+	 *
+	 * See struct rte_flow_item_arp_eth_ipv4.
+	 */
+	RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4,
+
+	/**
+	 * Matches the presence of any IPv6 extension header.
+	 *
+	 * See struct rte_flow_item_ipv6_ext.
+	 */
+	RTE_FLOW_ITEM_TYPE_IPV6_EXT,
+
+	/**
+	 * Matches any ICMPv6 header.
+	 *
+	 * See struct rte_flow_item_icmp6.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6,
+
+	/**
+	 * Matches an ICMPv6 neighbor discovery solicitation.
+	 *
+	 * See struct rte_flow_item_icmp6_nd_ns.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS,
+
+	/**
+	 * Matches an ICMPv6 neighbor discovery advertisement.
+	 *
+	 * See struct rte_flow_item_icmp6_nd_na.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA,
+
+	/**
+	 * Matches the presence of any ICMPv6 neighbor discovery option.
+	 *
+	 * See struct rte_flow_item_icmp6_nd_opt.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT,
+
+	/**
+	 * Matches an ICMPv6 neighbor discovery source Ethernet link-layer
+	 * address option.
+	 *
+	 * See struct rte_flow_item_icmp6_nd_opt_sla_eth.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH,
+
+	/**
+	 * Matches an ICMPv6 neighbor discovery target Ethernet link-layer
+	 * address option.
+	 *
+	 * See struct rte_flow_item_icmp6_nd_opt_tla_eth.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH,
+
+	/**
+	 * Matches specified mark field.
+	 *
+	 * See struct rte_flow_item_mark.
+	 */
+	RTE_FLOW_ITEM_TYPE_MARK,
 };
 
 /**
@@ -350,15 +440,15 @@ static const struct rte_flow_item_any rte_flow_item_any_mask = {
 /**
  * RTE_FLOW_ITEM_TYPE_VF
  *
- * Matches packets addressed to a virtual function ID of the device.
+ * Matches traffic originating from (ingress) or going to (egress) a given
+ * virtual function of the current device.
+ *
+ * If supported, should work even if the virtual function is not managed by
+ * the application and thus not associated with a DPDK port ID.
  *
- * If the underlying device function differs from the one that would
- * normally receive the matched traffic, specifying this item prevents it
- * from reaching that device unless the flow rule contains a VF
- * action. Packets are not duplicated between device instances by default.
+ * Note this pattern item does not match VF representors traffic which, as
+ * separate entities, should be addressed through their own DPDK port IDs.
  *
- * - Likely to return an error or never match any traffic if this causes a
- *   VF device to match traffic addressed to a different VF.
  * - Can be specified multiple times to match traffic addressed to several
  *   VF IDs.
  * - Can be combined with a PF item to match both PF and VF traffic.
@@ -366,7 +456,7 @@ static const struct rte_flow_item_any rte_flow_item_any_mask = {
  * A zeroed mask can be used to match any VF ID.
  */
 struct rte_flow_item_vf {
-	uint32_t id; /**< Destination VF ID. */
+	uint32_t id; /**< VF ID. */
 };
 
 /** Default mask for RTE_FLOW_ITEM_TYPE_VF. */
@@ -377,13 +467,13 @@ static const struct rte_flow_item_vf rte_flow_item_vf_mask = {
 #endif
 
 /**
- * RTE_FLOW_ITEM_TYPE_PORT
+ * RTE_FLOW_ITEM_TYPE_PHY_PORT
  *
- * Matches packets coming from the specified physical port of the underlying
- * device.
+ * Matches traffic originating from (ingress) or going to (egress) a
+ * physical port of the underlying device.
  *
- * The first PORT item overrides the physical port normally associated with
- * the specified DPDK input port (port_id). This item can be provided
+ * The first PHY_PORT item overrides the physical port normally associated
+ * with the specified DPDK input port (port_id). This item can be provided
  * several times to match additional physical ports.
  *
  * Note that physical ports are not necessarily tied to DPDK input ports
@@ -396,18 +486,44 @@ static const struct rte_flow_item_vf rte_flow_item_vf_mask = {
  *
  * A zeroed mask can be used to match any port index.
  */
-struct rte_flow_item_port {
+struct rte_flow_item_phy_port {
 	uint32_t index; /**< Physical port index. */
 };
 
-/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */
+/** Default mask for RTE_FLOW_ITEM_TYPE_PHY_PORT. */
 #ifndef __cplusplus
-static const struct rte_flow_item_port rte_flow_item_port_mask = {
+static const struct rte_flow_item_phy_port rte_flow_item_phy_port_mask = {
 	.index = 0x00000000,
 };
 #endif
 
 /**
+ * RTE_FLOW_ITEM_TYPE_PORT_ID
+ *
+ * Matches traffic originating from (ingress) or going to (egress) a given
+ * DPDK port ID.
+ *
+ * Normally only supported if the port ID in question is known by the
+ * underlying PMD and related to the device the flow rule is created
+ * against.
+ *
+ * This must not be confused with @p PHY_PORT which refers to the physical
+ * port of a device, whereas @p PORT_ID refers to a struct rte_eth_dev
+ * object on the application side (also known as "port representor"
+ * depending on the kind of underlying device).
+ */
+struct rte_flow_item_port_id {
+	uint32_t id; /**< DPDK port ID. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_PORT_ID. */
+#ifndef __cplusplus
+static const struct rte_flow_item_port_id rte_flow_item_port_id_mask = {
+	.id = 0xffffffff,
+};
+#endif
+
+/**
  * RTE_FLOW_ITEM_TYPE_RAW
  *
  * Matches a byte string of a given length at a given offset.
@@ -432,7 +548,7 @@ struct rte_flow_item_raw {
 	int32_t offset; /**< Absolute or relative offset for pattern. */
 	uint16_t limit; /**< Search area limit for start of pattern. */
 	uint16_t length; /**< Pattern length. */
-	uint8_t pattern[]; /**< Byte string to look for. */
+	const uint8_t *pattern; /**< Byte string to look for. */
 };
 
 /** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */
@@ -444,6 +560,7 @@ static const struct rte_flow_item_raw rte_flow_item_raw_mask = {
 	.offset = 0xffffffff,
 	.limit = 0xffff,
 	.length = 0xffff,
+	.pattern = NULL,
 };
 #endif
 
@@ -451,11 +568,17 @@ static const struct rte_flow_item_raw rte_flow_item_raw_mask = {
  * RTE_FLOW_ITEM_TYPE_ETH
  *
  * Matches an Ethernet header.
+ *
+ * The @p type field either stands for "EtherType" or "TPID" when followed
+ * by so-called layer 2.5 pattern items such as RTE_FLOW_ITEM_TYPE_VLAN. In
+ * the latter case, @p type refers to that of the outer header, with the
+ * inner EtherType/TPID provided by the subsequent pattern item. This is the
+ * same order as on the wire.
  */
 struct rte_flow_item_eth {
 	struct ether_addr dst; /**< Destination MAC. */
 	struct ether_addr src; /**< Source MAC. */
-	rte_be16_t type; /**< EtherType. */
+	rte_be16_t type; /**< EtherType or TPID. */
 };
 
 /** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */
@@ -472,19 +595,20 @@ static const struct rte_flow_item_eth rte_flow_item_eth_mask = {
  *
  * Matches an 802.1Q/ad VLAN tag.
  *
- * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or
- * RTE_FLOW_ITEM_TYPE_VLAN.
+ * The corresponding standard outer EtherType (TPID) values are
+ * ETHER_TYPE_VLAN or ETHER_TYPE_QINQ. It can be overridden by the preceding
+ * pattern item.
  */
 struct rte_flow_item_vlan {
-	rte_be16_t tpid; /**< Tag protocol identifier. */
 	rte_be16_t tci; /**< Tag control information. */
+	rte_be16_t inner_type; /**< Inner EtherType or TPID. */
 };
 
 /** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */
 #ifndef __cplusplus
 static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = {
-	.tpid = RTE_BE16(0x0000),
-	.tci = RTE_BE16(0xffff),
+	.tci = RTE_BE16(0x0fff),
+	.inner_type = RTE_BE16(0x0000),
 };
 #endif
 
@@ -514,7 +638,8 @@ static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = {
  *
  * Matches an IPv6 header.
  *
- * Note: IPv6 options are handled by dedicated pattern items.
+ * Note: IPv6 options are handled by dedicated pattern items, see
+ * RTE_FLOW_ITEM_TYPE_IPV6_EXT.
  */
 struct rte_flow_item_ipv6 {
 	struct ipv6_hdr hdr; /**< IPv6 header definition. */
@@ -633,9 +758,11 @@ static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = {
  * RTE_FLOW_ITEM_TYPE_E_TAG.
  *
  * Matches a E-tag header.
+ *
+ * The corresponding standard outer EtherType (TPID) value is
+ * ETHER_TYPE_ETAG. It can be overridden by the preceding pattern item.
  */
 struct rte_flow_item_e_tag {
-	rte_be16_t tpid; /**< Tag protocol identifier (0x893F). */
 	/**
 	 * E-Tag control information (E-TCI).
 	 * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b).
@@ -645,6 +772,7 @@ struct rte_flow_item_e_tag {
 	rte_be16_t rsvd_grp_ecid_b;
 	uint8_t in_ecid_e; /**< Ingress E-CID ext. */
 	uint8_t ecid_e; /**< E-CID ext. */
+	rte_be16_t inner_type; /**< Inner EtherType or TPID. */
 };
 
 /** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */
@@ -815,6 +943,241 @@ static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = {
 #endif
 
 /**
+ * RTE_FLOW_ITEM_TYPE_VXLAN_GPE (draft-ietf-nvo3-vxlan-gpe-05).
+ *
+ * Matches a VXLAN-GPE header.
+ */
+struct rte_flow_item_vxlan_gpe {
+	uint8_t flags; /**< Normally 0x0c (I and P flags). */
+	uint8_t rsvd0[2]; /**< Reserved, normally 0x0000. */
+	uint8_t protocol; /**< Protocol type. */
+	uint8_t vni[3]; /**< VXLAN identifier. */
+	uint8_t rsvd1; /**< Reserved, normally 0x00. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN_GPE. */
+#ifndef __cplusplus
+static const struct rte_flow_item_vxlan_gpe rte_flow_item_vxlan_gpe_mask = {
+	.vni = "\xff\xff\xff",
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4
+ *
+ * Matches an ARP header for Ethernet/IPv4.
+ */
+struct rte_flow_item_arp_eth_ipv4 {
+	rte_be16_t hrd; /**< Hardware type, normally 1. */
+	rte_be16_t pro; /**< Protocol type, normally 0x0800. */
+	uint8_t hln; /**< Hardware address length, normally 6. */
+	uint8_t pln; /**< Protocol address length, normally 4. */
+	rte_be16_t op; /**< Opcode (1 for request, 2 for reply). */
+	struct ether_addr sha; /**< Sender hardware address. */
+	rte_be32_t spa; /**< Sender IPv4 address. */
+	struct ether_addr tha; /**< Target hardware address. */
+	rte_be32_t tpa; /**< Target IPv4 address. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4. */
+#ifndef __cplusplus
+static const struct rte_flow_item_arp_eth_ipv4
+rte_flow_item_arp_eth_ipv4_mask = {
+	.sha.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	.spa = RTE_BE32(0xffffffff),
+	.tha.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	.tpa = RTE_BE32(0xffffffff),
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_IPV6_EXT
+ *
+ * Matches the presence of any IPv6 extension header.
+ *
+ * Normally preceded by any of:
+ *
+ * - RTE_FLOW_ITEM_TYPE_IPV6
+ * - RTE_FLOW_ITEM_TYPE_IPV6_EXT
+ */
+struct rte_flow_item_ipv6_ext {
+	uint8_t next_hdr; /**< Next header. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6_EXT. */
+#ifndef __cplusplus
+static const
+struct rte_flow_item_ipv6_ext rte_flow_item_ipv6_ext_mask = {
+	.next_hdr = 0xff,
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6
+ *
+ * Matches any ICMPv6 header.
+ */
+struct rte_flow_item_icmp6 {
+	uint8_t type; /**< ICMPv6 type. */
+	uint8_t code; /**< ICMPv6 code. */
+	uint16_t checksum; /**< ICMPv6 checksum. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6. */
+#ifndef __cplusplus
+static const struct rte_flow_item_icmp6 rte_flow_item_icmp6_mask = {
+	.type = 0xff,
+	.code = 0xff,
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS
+ *
+ * Matches an ICMPv6 neighbor discovery solicitation.
+ */
+struct rte_flow_item_icmp6_nd_ns {
+	uint8_t type; /**< ICMPv6 type, normally 135. */
+	uint8_t code; /**< ICMPv6 code, normally 0. */
+	rte_be16_t checksum; /**< ICMPv6 checksum. */
+	rte_be32_t reserved; /**< Reserved, normally 0. */
+	uint8_t target_addr[16]; /**< Target address. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS. */
+#ifndef __cplusplus
+static const
+struct rte_flow_item_icmp6_nd_ns rte_flow_item_icmp6_nd_ns_mask = {
+	.target_addr =
+		"\xff\xff\xff\xff\xff\xff\xff\xff"
+		"\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA
+ *
+ * Matches an ICMPv6 neighbor discovery advertisement.
+ */
+struct rte_flow_item_icmp6_nd_na {
+	uint8_t type; /**< ICMPv6 type, normally 136. */
+	uint8_t code; /**< ICMPv6 code, normally 0. */
+	rte_be16_t checksum; /**< ICMPv6 checksum. */
+	/**
+	 * Route flag (1b), solicited flag (1b), override flag (1b),
+	 * reserved (29b).
+	 */
+	rte_be32_t rso_reserved;
+	uint8_t target_addr[16]; /**< Target address. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA. */
+#ifndef __cplusplus
+static const
+struct rte_flow_item_icmp6_nd_na rte_flow_item_icmp6_nd_na_mask = {
+	.target_addr =
+		"\xff\xff\xff\xff\xff\xff\xff\xff"
+		"\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT
+ *
+ * Matches the presence of any ICMPv6 neighbor discovery option.
+ *
+ * Normally preceded by any of:
+ *
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT
+ */
+struct rte_flow_item_icmp6_nd_opt {
+	uint8_t type; /**< ND option type. */
+	uint8_t length; /**< ND option length. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT. */
+#ifndef __cplusplus
+static const struct rte_flow_item_icmp6_nd_opt
+rte_flow_item_icmp6_nd_opt_mask = {
+	.type = 0xff,
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH
+ *
+ * Matches an ICMPv6 neighbor discovery source Ethernet link-layer address
+ * option.
+ *
+ * Normally preceded by any of:
+ *
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT
+ */
+struct rte_flow_item_icmp6_nd_opt_sla_eth {
+	uint8_t type; /**< ND option type, normally 1. */
+	uint8_t length; /**< ND option length, normally 1. */
+	struct ether_addr sla; /**< Source Ethernet LLA. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH. */
+#ifndef __cplusplus
+static const struct rte_flow_item_icmp6_nd_opt_sla_eth
+rte_flow_item_icmp6_nd_opt_sla_eth_mask = {
+	.sla.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH
+ *
+ * Matches an ICMPv6 neighbor discovery target Ethernet link-layer address
+ * option.
+ *
+ * Normally preceded by any of:
+ *
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS
+ * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT
+ */
+struct rte_flow_item_icmp6_nd_opt_tla_eth {
+	uint8_t type; /**< ND option type, normally 2. */
+	uint8_t length; /**< ND option length, normally 1. */
+	struct ether_addr tla; /**< Target Ethernet LLA. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH. */
+#ifndef __cplusplus
+static const struct rte_flow_item_icmp6_nd_opt_tla_eth
+rte_flow_item_icmp6_nd_opt_tla_eth_mask = {
+	.tla.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+};
+#endif
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ITEM_TYPE_MARK
+ *
+ * Matches an arbitrary integer value which was set using the ``MARK`` action
+ * in a previously matched rule.
+ *
+ * This item can only be specified once as a match criteria as the ``MARK``
+ * action can only be specified once in a flow action.
+ *
+ * This value is arbitrary and application-defined. Maximum allowed value
+ * depends on the underlying implementation.
+ *
+ * Depending on the underlying implementation the MARK item may be supported on
+ * the physical device, with virtual groups in the PMD or not at all.
+ */
+struct rte_flow_item_mark {
+	uint32_t id; /**< Integer value to match against. */
+};
+
+/**
  * Matching pattern item definition.
  *
  * A pattern is formed by stacking items starting from the lowest protocol
@@ -859,33 +1222,28 @@ struct rte_flow_item {
  *
  * Each possible action is represented by a type. Some have associated
  * configuration structures. Several actions combined in a list can be
- * affected to a flow rule. That list is not ordered.
+ * assigned to a flow rule and are performed in order.
  *
  * They fall in three categories:
  *
- * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
- *   processing matched packets by subsequent flow rules, unless overridden
- *   with PASSTHRU.
- *
- * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
- *   for additional processing by subsequent flow rules.
+ * - Actions that modify the fate of matching traffic, for instance by
+ *   dropping or assigning it a specific destination.
  *
- * - Other non terminating meta actions that do not affect the fate of
- *   packets (END, VOID, MARK, FLAG, COUNT).
+ * - Actions that modify matching traffic contents or its properties. This
+ *   includes adding/removing encapsulation, encryption, compression and
+ *   marks.
  *
- * When several actions are combined in a flow rule, they should all have
- * different types (e.g. dropping a packet twice is not possible).
+ * - Actions related to the flow rule itself, such as updating counters or
+ *   making it non-terminating.
  *
- * Only the last action of a given type is taken into account. PMDs still
- * perform error checking on the entire list.
+ * Flow rules being terminating by default, not specifying any action of the
+ * fate kind results in undefined behavior. This applies to both ingress and
+ * egress.
  *
- * Note that PASSTHRU is the only action able to override a terminating
- * rule.
+ * PASSTHRU, when supported, makes a flow rule non-terminating.
  */
 enum rte_flow_action_type {
 	/**
-	 * [META]
-	 *
 	 * End marker for action lists. Prevents further processing of
 	 * actions, thereby ending the list.
 	 *
@@ -894,8 +1252,6 @@ enum rte_flow_action_type {
 	RTE_FLOW_ACTION_TYPE_END,
 
 	/**
-	 * [META]
-	 *
 	 * Used as a placeholder for convenience. It is ignored and simply
 	 * discarded by PMDs.
 	 *
@@ -904,18 +1260,23 @@ enum rte_flow_action_type {
 	RTE_FLOW_ACTION_TYPE_VOID,
 
 	/**
-	 * Leaves packets up for additional processing by subsequent flow
-	 * rules. This is the default when a rule does not contain a
-	 * terminating action, but can be specified to force a rule to
-	 * become non-terminating.
+	 * Leaves traffic up for additional processing by subsequent flow
+	 * rules; makes a flow rule non-terminating.
 	 *
 	 * No associated configuration structure.
 	 */
 	RTE_FLOW_ACTION_TYPE_PASSTHRU,
 
 	/**
-	 * [META]
+	 * RTE_FLOW_ACTION_TYPE_JUMP
+	 *
+	 * Redirects packets to a group on the current device.
 	 *
+	 * See struct rte_flow_action_jump.
+	 */
+	RTE_FLOW_ACTION_TYPE_JUMP,
+
+	/**
 	 * Attaches an integer value to packets and sets PKT_RX_FDIR and
 	 * PKT_RX_FDIR_ID mbuf flags.
 	 *
@@ -924,8 +1285,6 @@ enum rte_flow_action_type {
 	RTE_FLOW_ACTION_TYPE_MARK,
 
 	/**
-	 * [META]
-	 *
 	 * Flags packets. Similar to MARK without a specific value; only
 	 * sets the PKT_RX_FDIR mbuf flag.
 	 *
@@ -950,28 +1309,16 @@ enum rte_flow_action_type {
 	RTE_FLOW_ACTION_TYPE_DROP,
 
 	/**
-	 * [META]
-	 *
-	 * Enables counters for this rule.
+	 * Enables counters for this flow rule.
 	 *
 	 * These counters can be retrieved and reset through rte_flow_query(),
 	 * see struct rte_flow_query_count.
 	 *
-	 * No associated configuration structure.
+	 * See struct rte_flow_action_count.
 	 */
 	RTE_FLOW_ACTION_TYPE_COUNT,
 
 	/**
-	 * Duplicates packets to a given queue index.
-	 *
-	 * This is normally combined with QUEUE, however when used alone, it
-	 * is actually similar to QUEUE + PASSTHRU.
-	 *
-	 * See struct rte_flow_action_dup.
-	 */
-	RTE_FLOW_ACTION_TYPE_DUP,
-
-	/**
 	 * Similar to QUEUE, except RSS is additionally performed on packets
 	 * to spread them among several queues according to the provided
 	 * parameters.
@@ -981,22 +1328,37 @@ enum rte_flow_action_type {
 	RTE_FLOW_ACTION_TYPE_RSS,
 
 	/**
-	 * Redirects packets to the physical function (PF) of the current
-	 * device.
+	 * Directs matching traffic to the physical function (PF) of the
+	 * current device.
 	 *
 	 * No associated configuration structure.
 	 */
 	RTE_FLOW_ACTION_TYPE_PF,
 
 	/**
-	 * Redirects packets to the virtual function (VF) of the current
-	 * device with the specified ID.
+	 * Directs matching traffic to a given virtual function of the
+	 * current device.
 	 *
 	 * See struct rte_flow_action_vf.
 	 */
 	RTE_FLOW_ACTION_TYPE_VF,
 
 	/**
+	 * Directs packets to a given physical port index of the underlying
+	 * device.
+	 *
+	 * See struct rte_flow_action_phy_port.
+	 */
+	RTE_FLOW_ACTION_TYPE_PHY_PORT,
+
+	/**
+	 * Directs matching traffic to a given DPDK port ID.
+	 *
+	 * See struct rte_flow_action_port_id.
+	 */
+	RTE_FLOW_ACTION_TYPE_PORT_ID,
+
+	/**
 	 * Traffic metering and policing (MTR).
 	 *
 	 * See struct rte_flow_action_meter.
@@ -1010,7 +1372,139 @@ enum rte_flow_action_type {
 	 *
 	 * See struct rte_flow_action_security.
 	 */
-	RTE_FLOW_ACTION_TYPE_SECURITY
+	RTE_FLOW_ACTION_TYPE_SECURITY,
+
+	/**
+	 * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the
+	 * OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_set_mpls_ttl.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL,
+
+	/**
+	 * Implements OFPAT_DEC_MPLS_TTL ("decrement MPLS TTL") as defined
+	 * by the OpenFlow Switch Specification.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_DEC_MPLS_TTL,
+
+	/**
+	 * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow
+	 * Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_set_nw_ttl.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL,
+
+	/**
+	 * Implements OFPAT_DEC_NW_TTL ("decrement IP TTL") as defined by
+	 * the OpenFlow Switch Specification.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_DEC_NW_TTL,
+
+	/**
+	 * Implements OFPAT_COPY_TTL_OUT ("copy TTL "outwards" -- from
+	 * next-to-outermost to outermost") as defined by the OpenFlow
+	 * Switch Specification.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_OUT,
+
+	/**
+	 * Implements OFPAT_COPY_TTL_IN ("copy TTL "inwards" -- from
+	 * outermost to next-to-outermost") as defined by the OpenFlow
+	 * Switch Specification.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_IN,
+
+	/**
+	 * Implements OFPAT_POP_VLAN ("pop the outer VLAN tag") as defined
+	 * by the OpenFlow Switch Specification.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_POP_VLAN,
+
+	/**
+	 * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by
+	 * the OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_push_vlan.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN,
+
+	/**
+	 * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as
+	 * defined by the OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_set_vlan_vid.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
+
+	/**
+	 * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as
+	 * defined by the OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_set_vlan_pcp.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
+
+	/**
+	 * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined
+	 * by the OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_pop_mpls.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_POP_MPLS,
+
+	/**
+	 * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by
+	 * the OpenFlow Switch Specification.
+	 *
+	 * See struct rte_flow_action_of_push_mpls.
+	 */
+	RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS,
+
+	/**
+	 * Encapsulate flow in VXLAN tunnel as defined in
+	 * rte_flow_action_vxlan_encap action structure.
+	 *
+	 * See struct rte_flow_action_vxlan_encap.
+	 */
+	RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP,
+
+	/**
+	 * Decapsulate outer most VXLAN tunnel from matched flow.
+	 *
+	 * If flow pattern does not define a valid VXLAN tunnel (as specified by
+	 * RFC7348) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION
+	 * error.
+	 */
+	RTE_FLOW_ACTION_TYPE_VXLAN_DECAP,
+
+	/**
+	 * Encapsulate flow in NVGRE tunnel defined in the
+	 * rte_flow_action_nvgre_encap action structure.
+	 *
+	 * See struct rte_flow_action_nvgre_encap.
+	 */
+	RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP,
+
+	/**
+	 * Decapsulate outer most NVGRE tunnel from matched flow.
+	 *
+	 * If flow pattern does not define a valid NVGRE tunnel (as specified by
+	 * RFC7637) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION
+	 * error.
+	 */
+	RTE_FLOW_ACTION_TYPE_NVGRE_DECAP,
 };
 
 /**
@@ -1028,16 +1522,62 @@ struct rte_flow_action_mark {
 };
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_JUMP
+ *
+ * Redirects packets to a group on the current device.
+ *
+ * In a hierarchy of groups, which can be used to represent physical or logical
+ * flow tables on the device, this action allows the action to be a redirect to
+ * a group on that device.
+ */
+struct rte_flow_action_jump {
+	uint32_t group;
+};
+
+/**
  * RTE_FLOW_ACTION_TYPE_QUEUE
  *
  * Assign packets to a given queue index.
- *
- * Terminating by default.
  */
 struct rte_flow_action_queue {
 	uint16_t index; /**< Queue index to use. */
 };
 
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * Adds a counter action to a matched flow.
+ *
+ * If more than one count action is specified in a single flow rule, then each
+ * action must specify a unique id.
+ *
+ * Counters can be retrieved and reset through ``rte_flow_query()``, see
+ * ``struct rte_flow_query_count``.
+ *
+ * The shared flag indicates whether the counter is unique to the flow rule the
+ * action is specified with, or whether it is a shared counter.
+ *
+ * For a count action with the shared flag set, then then a global device
+ * namespace is assumed for the counter id, so that any matched flow rules using
+ * a count action with the same counter id on the same port will contribute to
+ * that counter.
+ *
+ * For ports within the same switch domain then the counter id namespace extends
+ * to all ports within that switch domain.
+ */
+struct rte_flow_action_count {
+	uint32_t shared:1; /**< Share counter ID with other flow rules. */
+	uint32_t reserved:31; /**< Reserved, must be zero. */
+	uint32_t id; /**< Counter ID. */
+};
+
 /**
  * RTE_FLOW_ACTION_TYPE_COUNT (query)
  *
@@ -1053,54 +1593,99 @@ struct rte_flow_query_count {
 };
 
 /**
- * RTE_FLOW_ACTION_TYPE_DUP
- *
- * Duplicates packets to a given queue index.
- *
- * This is normally combined with QUEUE, however when used alone, it is
- * actually similar to QUEUE + PASSTHRU.
- *
- * Non-terminating by default.
- */
-struct rte_flow_action_dup {
-	uint16_t index; /**< Queue index to duplicate packets to. */
-};
-
-/**
  * RTE_FLOW_ACTION_TYPE_RSS
  *
  * Similar to QUEUE, except RSS is additionally performed on packets to
  * spread them among several queues according to the provided parameters.
  *
+ * Unlike global RSS settings used by other DPDK APIs, unsetting the
+ * @p types field does not disable RSS in a flow rule. Doing so instead
+ * requests safe unspecified "best-effort" settings from the underlying PMD,
+ * which depending on the flow rule, may result in anything ranging from
+ * empty (single queue) to all-inclusive RSS.
+ *
  * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps
  * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only,
  * both can be requested simultaneously.
- *
- * Terminating by default.
  */
 struct rte_flow_action_rss {
-	const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */
-	uint16_t num; /**< Number of entries in queue[]. */
-	uint16_t queue[]; /**< Queues indices to use. */
+	enum rte_eth_hash_function func; /**< RSS hash function to apply. */
+	/**
+	 * Packet encapsulation level RSS hash @p types apply to.
+	 *
+	 * - @p 0 requests the default behavior. Depending on the packet
+	 *   type, it can mean outermost, innermost, anything in between or
+	 *   even no RSS.
+	 *
+	 *   It basically stands for the innermost encapsulation level RSS
+	 *   can be performed on according to PMD and device capabilities.
+	 *
+	 * - @p 1 requests RSS to be performed on the outermost packet
+	 *   encapsulation level.
+	 *
+	 * - @p 2 and subsequent values request RSS to be performed on the
+	 *   specified inner packet encapsulation level, from outermost to
+	 *   innermost (lower to higher values).
+	 *
+	 * Values other than @p 0 are not necessarily supported.
+	 *
+	 * Requesting a specific RSS level on unrecognized traffic results
+	 * in undefined behavior. For predictable results, it is recommended
+	 * to make the flow rule pattern match packet headers up to the
+	 * requested encapsulation level so that only matching traffic goes
+	 * through.
+	 */
+	uint32_t level;
+	uint64_t types; /**< Specific RSS hash types (see ETH_RSS_*). */
+	uint32_t key_len; /**< Hash key length in bytes. */
+	uint32_t queue_num; /**< Number of entries in @p queue. */
+	const uint8_t *key; /**< Hash key. */
+	const uint16_t *queue; /**< Queue indices to use. */
 };
 
 /**
  * RTE_FLOW_ACTION_TYPE_VF
  *
- * Redirects packets to a virtual function (VF) of the current device.
+ * Directs matching traffic to a given virtual function of the current
+ * device.
  *
  * Packets matched by a VF pattern item can be redirected to their original
  * VF ID instead of the specified one. This parameter may not be available
  * and is not guaranteed to work properly if the VF part is matched by a
  * prior flow rule or if packets are not addressed to a VF in the first
  * place.
- *
- * Terminating by default.
  */
 struct rte_flow_action_vf {
 	uint32_t original:1; /**< Use original VF ID if possible. */
 	uint32_t reserved:31; /**< Reserved, must be zero. */
-	uint32_t id; /**< VF ID to redirect packets to. */
+	uint32_t id; /**< VF ID. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_PHY_PORT
+ *
+ * Directs packets to a given physical port index of the underlying
+ * device.
+ *
+ * @see RTE_FLOW_ITEM_TYPE_PHY_PORT
+ */
+struct rte_flow_action_phy_port {
+	uint32_t original:1; /**< Use original port index if possible. */
+	uint32_t reserved:31; /**< Reserved, must be zero. */
+	uint32_t index; /**< Physical port index. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_PORT_ID
+ *
+ * Directs matching traffic to a given DPDK port ID.
+ *
+ * @see RTE_FLOW_ITEM_TYPE_PORT_ID
+ */
+struct rte_flow_action_port_id {
+	uint32_t original:1; /**< Use original DPDK port ID if possible. */
+	uint32_t reserved:31; /**< Reserved, must be zero. */
+	uint32_t id; /**< DPDK port ID. */
 };
 
 /**
@@ -1110,8 +1695,6 @@ struct rte_flow_action_vf {
  *
  * Packets matched by items of this type can be either dropped or passed to the
  * next item with their color set by the MTR object.
- *
- * Non-terminating by default.
  */
 struct rte_flow_action_meter {
 	uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */
@@ -1141,14 +1724,151 @@ struct rte_flow_action_meter {
  * direction.
  *
  * Multiple flows can be configured to use the same security session.
- *
- * Non-terminating by default.
  */
 struct rte_flow_action_security {
 	void *security_session; /**< Pointer to security session structure. */
 };
 
 /**
+ * RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL
+ *
+ * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the OpenFlow
+ * Switch Specification.
+ */
+struct rte_flow_action_of_set_mpls_ttl {
+	uint8_t mpls_ttl; /**< MPLS TTL. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL
+ *
+ * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow Switch
+ * Specification.
+ */
+struct rte_flow_action_of_set_nw_ttl {
+	uint8_t nw_ttl; /**< IP TTL. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
+ *
+ * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by the
+ * OpenFlow Switch Specification.
+ */
+struct rte_flow_action_of_push_vlan {
+	rte_be16_t ethertype; /**< EtherType. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
+ *
+ * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as defined by
+ * the OpenFlow Switch Specification.
+ */
+struct rte_flow_action_of_set_vlan_vid {
+	rte_be16_t vlan_vid; /**< VLAN id. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
+ *
+ * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as defined by
+ * the OpenFlow Switch Specification.
+ */
+struct rte_flow_action_of_set_vlan_pcp {
+	uint8_t vlan_pcp; /**< VLAN priority. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_POP_MPLS
+ *
+ * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined by the
+ * OpenFlow Switch Specification.
+ */
+struct rte_flow_action_of_pop_mpls {
+	rte_be16_t ethertype; /**< EtherType. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS
+ *
+ * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by the
+ * OpenFlow Switch Specification.
+ */
+struct rte_flow_action_of_push_mpls {
+	rte_be16_t ethertype; /**< EtherType. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
+ *
+ * VXLAN tunnel end-point encapsulation data definition
+ *
+ * The tunnel definition is provided through the flow item pattern, the
+ * provided pattern must conform to RFC7348 for the tunnel specified. The flow
+ * definition must be provided in order from the RTE_FLOW_ITEM_TYPE_ETH
+ * definition up the end item which is specified by RTE_FLOW_ITEM_TYPE_END.
+ *
+ * The mask field allows user to specify which fields in the flow item
+ * definitions can be ignored and which have valid data and can be used
+ * verbatim.
+ *
+ * Note: the last field is not used in the definition of a tunnel and can be
+ * ignored.
+ *
+ * Valid flow definition for RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP include:
+ *
+ * - ETH / IPV4 / UDP / VXLAN / END
+ * - ETH / IPV6 / UDP / VXLAN / END
+ * - ETH / VLAN / IPV4 / UDP / VXLAN / END
+ *
+ */
+struct rte_flow_action_vxlan_encap {
+	/**
+	 * Encapsulating vxlan tunnel definition
+	 * (terminated by the END pattern item).
+	 */
+	struct rte_flow_item *definition;
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP
+ *
+ * NVGRE tunnel end-point encapsulation data definition
+ *
+ * The tunnel definition is provided through the flow item pattern  the
+ * provided pattern must conform with RFC7637. The flow definition must be
+ * provided in order from the RTE_FLOW_ITEM_TYPE_ETH definition up the end item
+ * which is specified by RTE_FLOW_ITEM_TYPE_END.
+ *
+ * The mask field allows user to specify which fields in the flow item
+ * definitions can be ignored and which have valid data and can be used
+ * verbatim.
+ *
+ * Note: the last field is not used in the definition of a tunnel and can be
+ * ignored.
+ *
+ * Valid flow definition for RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP include:
+ *
+ * - ETH / IPV4 / NVGRE / END
+ * - ETH / VLAN / IPV6 / NVGRE / END
+ *
+ */
+struct rte_flow_action_nvgre_encap {
+	/**
+	 * Encapsulating vxlan tunnel definition
+	 * (terminated by the END pattern item).
+	 */
+	struct rte_flow_item *definition;
+};
+
+/*
  * Definition of a single action.
  *
  * A list of actions is terminated by a END action.
@@ -1182,10 +1902,15 @@ enum rte_flow_error_type {
 	RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */
 	RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */
 	RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
+	RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, /**< Transfer field. */
 	RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
 	RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+	RTE_FLOW_ERROR_TYPE_ITEM_SPEC, /**< Item specification. */
+	RTE_FLOW_ERROR_TYPE_ITEM_LAST, /**< Item specification range. */
+	RTE_FLOW_ERROR_TYPE_ITEM_MASK, /**< Item specification mask. */
 	RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
 	RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+	RTE_FLOW_ERROR_TYPE_ACTION_CONF, /**< Action configuration. */
 	RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
 };
 
@@ -1351,7 +2076,7 @@ rte_flow_flush(uint16_t port_id,
  * @param flow
  *   Flow rule handle to query.
  * @param action
- *   Action type to query.
+ *   Action definition as defined in original flow rule.
  * @param[in, out] data
  *   Pointer to storage for the associated query data type.
  * @param[out] error
@@ -1364,7 +2089,7 @@ rte_flow_flush(uint16_t port_id,
 int
 rte_flow_query(uint16_t port_id,
 	       struct rte_flow *flow,
-	       enum rte_flow_action_type action,
+	       const struct rte_flow_action *action,
 	       void *data,
 	       struct rte_flow_error *error);
 
diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
index 7778c8e0..688f7230 100644
--- a/lib/librte_ether/rte_flow_driver.h
+++ b/lib/librte_ethdev/rte_flow_driver.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright 2016 6WIND S.A.
- * Copyright 2016 Mellanox.
+ * Copyright 2016 Mellanox Technologies, Ltd
  */
 
 #ifndef RTE_FLOW_DRIVER_H_
@@ -88,7 +88,7 @@ struct rte_flow_ops {
 	int (*query)
 		(struct rte_eth_dev *,
 		 struct rte_flow *,
-		 enum rte_flow_action_type,
+		 const struct rte_flow_action *,
 		 void *,
 		 struct rte_flow_error *);
 	/** See rte_flow_isolate(). */
@@ -114,6 +114,69 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/** Helper macro to build input graph for rte_flow_expand_rss(). */
+#define RTE_FLOW_EXPAND_RSS_NEXT(...) \
+	(const int []){ \
+		__VA_ARGS__, 0, \
+	}
+
+/** Node object of input graph for rte_flow_expand_rss(). */
+struct rte_flow_expand_node {
+	const int *const next;
+	/**<
+	 * List of next node indexes. Index 0 is interpreted as a terminator.
+	 */
+	const enum rte_flow_item_type type;
+	/**< Pattern item type of current node. */
+	uint64_t rss_types;
+	/**<
+	 * RSS types bit-field associated with this node
+	 * (see ETH_RSS_* definitions).
+	 */
+};
+
+/** Object returned by rte_flow_expand_rss(). */
+struct rte_flow_expand_rss {
+	uint32_t entries;
+	/**< Number of entries @p patterns and @p priorities. */
+	struct {
+		struct rte_flow_item *pattern; /**< Expanded pattern array. */
+		uint32_t priority; /**< Priority offset for each expansion. */
+	} entry[];
+};
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ *
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @param[out] buf
+ *   Buffer to store the result expansion.
+ * @param[in] size
+ *   Buffer size in bytes. If 0, @p buf can be NULL.
+ * @param[in] pattern
+ *   User flow pattern.
+ * @param[in] types
+ *   RSS types to expand (see ETH_RSS_* definitions).
+ * @param[in] graph
+ *   Input graph to expand @p pattern according to @p types.
+ * @param[in] graph_root_index
+ *   Index of root node in @p graph, typically 0.
+ *
+ * @return
+ *   A positive value representing the size of @p buf in bytes regardless of
+ *   @p size on success, a negative errno value otherwise and rte_errno is
+ *   set, the following errors are defined:
+ *
+ *   -E2BIG: graph-depth @p graph is too deep.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pattern, uint64_t types,
+		    const struct rte_flow_expand_node graph[],
+		    int graph_root_index);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ether/rte_mtr.c b/lib/librte_ethdev/rte_mtr.c
index 1046cb5f..1046cb5f 100644
--- a/lib/librte_ether/rte_mtr.c
+++ b/lib/librte_ethdev/rte_mtr.c
diff --git a/lib/librte_ether/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h
index c4819b27..c4819b27 100644
--- a/lib/librte_ether/rte_mtr.h
+++ b/lib/librte_ethdev/rte_mtr.h
diff --git a/lib/librte_ether/rte_mtr_driver.h b/lib/librte_ethdev/rte_mtr_driver.h
index c9a6d7c3..c9a6d7c3 100644
--- a/lib/librte_ether/rte_mtr_driver.h
+++ b/lib/librte_ethdev/rte_mtr_driver.h
diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ethdev/rte_tm.c
index 9709454f..9709454f 100644
--- a/lib/librte_ether/rte_tm.c
+++ b/lib/librte_ethdev/rte_tm.c
diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ethdev/rte_tm.h
index 2b25a871..955f02ff 100644
--- a/lib/librte_ether/rte_tm.h
+++ b/lib/librte_ethdev/rte_tm.h
@@ -377,6 +377,22 @@ struct rte_tm_capabilities {
 	 */
 	uint32_t sched_wfq_weight_max;
 
+	/** WRED packet mode support. When non-zero, this parameter indicates
+	 * that there is atleast one leaf node that supports the WRED packet
+	 * mode, which might not be true for all the leaf nodes. In packet
+	 * mode, the WRED thresholds specify the queue length in packets, as
+	 * opposed to bytes.
+	 */
+	int cman_wred_packet_mode_supported;
+
+	/** WRED byte mode support. When non-zero, this parameter indicates that
+	 * there is atleast one leaf node that supports the WRED byte mode,
+	 * which might not be true for all the leaf nodes. In byte mode, the
+	 * WRED thresholds specify the queue length in bytes, as opposed to
+	 * packets.
+	 */
+	int cman_wred_byte_mode_supported;
+
 	/** Head drop algorithm support. When non-zero, this parameter
 	 * indicates that there is at least one leaf node that supports the head
 	 * drop algorithm, which might not be true for all the leaf nodes.
@@ -628,6 +644,24 @@ struct rte_tm_level_capabilities {
 			 */
 			uint32_t shaper_shared_n_max;
 
+			/** WRED packet mode support. When non-zero, this
+			 * parameter indicates that there is atleast one leaf
+			 * node on this level that supports the WRED packet
+			 * mode, which might not be true for all the leaf
+			 * nodes. In packet mode, the WRED thresholds specify
+			 * the queue length in packets, as opposed to bytes.
+			 */
+			int cman_wred_packet_mode_supported;
+
+			/** WRED byte mode support. When non-zero, this
+			 * parameter indicates that there is atleast one leaf
+			 * node on this level that supports the WRED byte mode,
+			 * which might not be true for all the leaf nodes. In
+			 * byte mode, the WRED thresholds specify the queue
+			 * length in bytes, as opposed to packets.
+			 */
+			int cman_wred_byte_mode_supported;
+
 			/** Head drop algorithm support. When non-zero, this
 			 * parameter indicates that there is at least one leaf
 			 * node on this level that supports the head drop
@@ -743,6 +777,12 @@ struct rte_tm_node_capabilities {
 
 		/** Items valid only for leaf nodes. */
 		struct {
+			/** WRED packet mode support for current node. */
+			int cman_wred_packet_mode_supported;
+
+			/** WRED byte mode support for current node. */
+			int cman_wred_byte_mode_supported;
+
 			/** Head drop algorithm support for current node. */
 			int cman_head_drop_supported;
 
@@ -791,10 +831,10 @@ enum rte_tm_cman_mode {
  */
 struct rte_tm_red_params {
 	/** Minimum queue threshold */
-	uint16_t min_th;
+	uint32_t min_th;
 
 	/** Maximum queue threshold */
-	uint16_t max_th;
+	uint32_t max_th;
 
 	/** Inverse of packet marking probability maximum value (maxp), i.e.
 	 * maxp_inv = 1 / maxp
@@ -815,10 +855,19 @@ struct rte_tm_red_params {
  * WRED context is used to perform congestion management for a single leaf
  * node, while a shared WRED context is used to perform congestion management
  * for a group of leaf nodes.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_packet_mode_supported
+ * @see struct rte_tm_capabilities::cman_wred_byte_mode_supported
  */
 struct rte_tm_wred_params {
 	/** One set of RED parameters per packet color */
 	struct rte_tm_red_params red_params[RTE_TM_COLORS];
+
+	/** When non-zero, the *min_th* and *max_th* thresholds are specified
+	 * in packets (WRED packet mode). When zero, the *min_th* and *max_th*
+	 * thresholds are specified in bytes (WRED byte mode)
+	 */
+	int packet_mode;
 };
 
 /**
@@ -1520,6 +1569,10 @@ rte_tm_hierarchy_commit(uint16_t port_id,
 /**
  * Traffic manager node parent update
  *
+ * This function may be used to move a node and its children to a different
+ * parent.  Additionally, if the new parent is the same as the current parent,
+ * this function will update the priority/weight of an existing node.
+ *
  * Restriction for root node: its parent cannot be changed.
  *
  * This function can only be called after the rte_tm_hierarchy_commit()
@@ -1643,7 +1696,7 @@ rte_tm_node_stats_update(uint16_t port_id,
  * @param[in] port_id
  *   The port identifier of the Ethernet device.
  * @param[in] node_id
- *   Node ID. Needs to be valid leaf node ID.
+ *   Node ID. Needs to be valid non-leaf node ID.
  * @param[in] wfq_weight_mode
  *   WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is
  *   to be used for all priorities. When non-NULL, it points to a pre-allocated
diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ethdev/rte_tm_driver.h
index 90114ff5..90114ff5 100644
--- a/lib/librte_ether/rte_tm_driver.h
+++ b/lib/librte_ethdev/rte_tm_driver.h
diff --git a/lib/librte_ether/rte_ethdev_driver.h b/lib/librte_ether/rte_ethdev_driver.h
deleted file mode 100644
index 45f08c65..00000000
--- a/lib/librte_ether/rte_ethdev_driver.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2017 Intel Corporation
- */
-
-#ifndef _RTE_ETHDEV_DRIVER_H_
-#define _RTE_ETHDEV_DRIVER_H_
-
-/**
- * @file
- *
- * RTE Ethernet Device PMD API
- *
- * These APIs for the use from Ethernet drivers, user applications shouldn't
- * use them.
- *
- */
-
-#include <rte_ethdev.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @internal
- * Returns a ethdev slot specified by the unique identifier name.
- *
- * @param	name
- *  The pointer to the Unique identifier name for each Ethernet device
- * @return
- *   - The pointer to the ethdev slot, on success. NULL on error
- */
-struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
-
-/**
- * @internal
- * Allocates a new ethdev slot for an ethernet device and returns the pointer
- * to that slot for the driver to use.
- *
- * @param	name	Unique identifier name for each Ethernet device
- * @param	type	Device type of this Ethernet device
- * @return
- *   - Slot in the rte_dev_devices array for a new device;
- */
-struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
-
-/**
- * @internal
- * Attach to the ethdev already initialized by the primary
- * process.
- *
- * @param       name    Ethernet device's name.
- * @return
- *   - Success: Slot in the rte_dev_devices array for attached
- *        device.
- *   - Error: Null pointer.
- */
-struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name);
-
-/**
- * @internal
- * Release the specified ethdev port.
- *
- * @param eth_dev
- * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
- * @return
- *   - 0 on success, negative on error
- */
-int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
-
-/**
- * @internal
- * Release device queues and clear its configuration to force the user
- * application to reconfigure it. It is for internal use only.
- *
- * @param dev
- *  Pointer to struct rte_eth_dev.
- *
- * @return
- *  void
- */
-void _rte_eth_dev_reset(struct rte_eth_dev *dev);
-
-/**
- * @internal Executes all the user application registered callbacks for
- * the specific device. It is for DPDK internal user only. User
- * application should not call it directly.
- *
- * @param dev
- *  Pointer to struct rte_eth_dev.
- * @param event
- *  Eth device interrupt event type.
- * @param ret_param
- *  To pass data back to user application.
- *  This allows the user application to decide if a particular function
- *  is permitted or not.
- *
- * @return
- *  int
- */
-int _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
-		enum rte_eth_event_type event, void *ret_param);
-
-/**
- * Create memzone for HW rings.
- * malloc can't be used as the physical address is needed.
- * If the memzone is already created, then this function returns a ptr
- * to the old one.
- *
- * @param eth_dev
- *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
- * @param name
- *   The name of the memory zone
- * @param queue_id
- *   The index of the queue to add to name
- * @param size
- *   The sizeof of the memory area
- * @param align
- *   Alignment for resulting memzone. Must be a power of 2.
- * @param socket_id
- *   The *socket_id* argument is the socket identifier in case of NUMA.
- */
-const struct rte_memzone *
-rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
-			 uint16_t queue_id, size_t size,
-			 unsigned align, int socket_id);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_ETHDEV_DRIVER_H_ */
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index d27dd070..47f599a6 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -8,18 +8,26 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_eventdev.a
 
 # library version
-LIBABIVER := 3
+LIBABIVER := 5
 
 # build flags
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+CFLAGS += -DLINUX
+else
+CFLAGS += -DBSD
+endif
+LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash -lrte_mempool -lrte_timer
+LDLIBS += -lrte_mbuf -lrte_cryptodev -lpthread
 
 # library source files
 SRCS-y += rte_eventdev.c
 SRCS-y += rte_event_ring.c
 SRCS-y += rte_event_eth_rx_adapter.c
+SRCS-y += rte_event_timer_adapter.c
+SRCS-y += rte_event_crypto_adapter.c
 
 # export include files
 SYMLINK-y-include += rte_eventdev.h
@@ -28,6 +36,9 @@ SYMLINK-y-include += rte_eventdev_pmd_pci.h
 SYMLINK-y-include += rte_eventdev_pmd_vdev.h
 SYMLINK-y-include += rte_event_ring.h
 SYMLINK-y-include += rte_event_eth_rx_adapter.h
+SYMLINK-y-include += rte_event_timer_adapter.h
+SYMLINK-y-include += rte_event_timer_adapter_pmd.h
+SYMLINK-y-include += rte_event_crypto_adapter.h
 
 # versioning export map
 EXPORT_MAP := rte_eventdev_version.map
diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build
index d1a99602..3cbaf298 100644
--- a/lib/librte_eventdev/meson.build
+++ b/lib/librte_eventdev/meson.build
@@ -1,14 +1,27 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 5
 allow_experimental_apis = true
+
+if host_machine.system() == 'linux'
+	cflags += '-DLINUX'
+else
+	cflags += '-DBSD'
+endif
+
 sources = files('rte_eventdev.c',
 		'rte_event_ring.c',
-		'rte_event_eth_rx_adapter.c')
+		'rte_event_eth_rx_adapter.c',
+		'rte_event_timer_adapter.c',
+		'rte_event_crypto_adapter.c')
 headers = files('rte_eventdev.h',
 		'rte_eventdev_pmd.h',
 		'rte_eventdev_pmd_pci.h',
 		'rte_eventdev_pmd_vdev.h',
 		'rte_event_ring.h',
-		'rte_event_eth_rx_adapter.h')
-deps += ['ring', 'ethdev', 'hash']
+		'rte_event_eth_rx_adapter.h',
+		'rte_event_timer_adapter.h',
+		'rte_event_timer_adapter_pmd.h',
+		'rte_event_crypto_adapter.h')
+deps += ['ring', 'ethdev', 'hash', 'mempool', 'mbuf', 'timer', 'cryptodev']
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.c b/lib/librte_eventdev/rte_event_crypto_adapter.c
new file mode 100644
index 00000000..11b28ca9
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.c
@@ -0,0 +1,1128 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <string.h>
+#include <stdbool.h>
+#include <rte_common.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_service_component.h>
+
+#include "rte_eventdev.h"
+#include "rte_eventdev_pmd.h"
+#include "rte_event_crypto_adapter.h"
+
+#define BATCH_SIZE 32
+#define DEFAULT_MAX_NB 128
+#define CRYPTO_ADAPTER_NAME_LEN 32
+#define CRYPTO_ADAPTER_MEM_NAME_LEN 32
+#define CRYPTO_ADAPTER_MAX_EV_ENQ_RETRIES 100
+
+/* Flush an instance's enqueue buffers every CRYPTO_ENQ_FLUSH_THRESHOLD
+ * iterations of eca_crypto_adapter_enq_run()
+ */
+#define CRYPTO_ENQ_FLUSH_THRESHOLD 1024
+
+struct rte_event_crypto_adapter {
+	/* Event device identifier */
+	uint8_t eventdev_id;
+	/* Event port identifier */
+	uint8_t event_port_id;
+	/* Store event device's implicit release capability */
+	uint8_t implicit_release_disabled;
+	/* Max crypto ops processed in any service function invocation */
+	uint32_t max_nb;
+	/* Lock to serialize config updates with service function */
+	rte_spinlock_t lock;
+	/* Next crypto device to be processed */
+	uint16_t next_cdev_id;
+	/* Per crypto device structure */
+	struct crypto_device_info *cdevs;
+	/* Loop counter to flush crypto ops */
+	uint16_t transmit_loop_count;
+	/* Per instance stats structure */
+	struct rte_event_crypto_adapter_stats crypto_stats;
+	/* Configuration callback for rte_service configuration */
+	rte_event_crypto_adapter_conf_cb conf_cb;
+	/* Configuration callback argument */
+	void *conf_arg;
+	/* Set if  default_cb is being used */
+	int default_cb_arg;
+	/* Service initialization state */
+	uint8_t service_inited;
+	/* Memory allocation name */
+	char mem_name[CRYPTO_ADAPTER_MEM_NAME_LEN];
+	/* Socket identifier cached from eventdev */
+	int socket_id;
+	/* Per adapter EAL service */
+	uint32_t service_id;
+	/* No. of queue pairs configured */
+	uint16_t nb_qps;
+	/* Adapter mode */
+	enum rte_event_crypto_adapter_mode mode;
+} __rte_cache_aligned;
+
+/* Per crypto device information */
+struct crypto_device_info {
+	/* Pointer to cryptodev */
+	struct rte_cryptodev *dev;
+	/* Pointer to queue pair info */
+	struct crypto_queue_pair_info *qpairs;
+	/* Next queue pair to be processed */
+	uint16_t next_queue_pair_id;
+	/* Set to indicate cryptodev->eventdev packet
+	 * transfer uses a hardware mechanism
+	 */
+	uint8_t internal_event_port;
+	/* Set to indicate processing has been started */
+	uint8_t dev_started;
+	/* If num_qpairs > 0, the start callback will
+	 * be invoked if not already invoked
+	 */
+	uint16_t num_qpairs;
+} __rte_cache_aligned;
+
+/* Per queue pair information */
+struct crypto_queue_pair_info {
+	/* Set to indicate queue pair is enabled */
+	bool qp_enabled;
+	/* Pointer to hold rte_crypto_ops for batching */
+	struct rte_crypto_op **op_buffer;
+	/* No of crypto ops accumulated */
+	uint8_t len;
+} __rte_cache_aligned;
+
+static struct rte_event_crypto_adapter **event_crypto_adapter;
+
+/* Macros to check for valid adapter */
+#define EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \
+	if (!eca_valid_id(id)) { \
+		RTE_EDEV_LOG_ERR("Invalid crypto adapter id = %d\n", id); \
+		return retval; \
+	} \
+} while (0)
+
+static inline int
+eca_valid_id(uint8_t id)
+{
+	return id < RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE;
+}
+
+static int
+eca_init(void)
+{
+	const char *name = "crypto_adapter_array";
+	const struct rte_memzone *mz;
+	unsigned int sz;
+
+	sz = sizeof(*event_crypto_adapter) *
+	    RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE;
+	sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE);
+
+	mz = rte_memzone_lookup(name);
+	if (mz == NULL) {
+		mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0,
+						 RTE_CACHE_LINE_SIZE);
+		if (mz == NULL) {
+			RTE_EDEV_LOG_ERR("failed to reserve memzone err = %"
+					PRId32, rte_errno);
+			return -rte_errno;
+		}
+	}
+
+	event_crypto_adapter = mz->addr;
+	return 0;
+}
+
+static inline struct rte_event_crypto_adapter *
+eca_id_to_adapter(uint8_t id)
+{
+	return event_crypto_adapter ?
+		event_crypto_adapter[id] : NULL;
+}
+
+static int
+eca_default_config_cb(uint8_t id, uint8_t dev_id,
+			struct rte_event_crypto_adapter_conf *conf, void *arg)
+{
+	struct rte_event_dev_config dev_conf;
+	struct rte_eventdev *dev;
+	uint8_t port_id;
+	int started;
+	int ret;
+	struct rte_event_port_conf *port_conf = arg;
+	struct rte_event_crypto_adapter *adapter = eca_id_to_adapter(id);
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+	dev_conf = dev->data->dev_conf;
+
+	started = dev->data->dev_started;
+	if (started)
+		rte_event_dev_stop(dev_id);
+	port_id = dev_conf.nb_event_ports;
+	dev_conf.nb_event_ports += 1;
+	ret = rte_event_dev_configure(dev_id, &dev_conf);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to configure event dev %u\n", dev_id);
+		if (started) {
+			if (rte_event_dev_start(dev_id))
+				return -EIO;
+		}
+		return ret;
+	}
+
+	ret = rte_event_port_setup(dev_id, port_id, port_conf);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to setup event port %u\n", port_id);
+		return ret;
+	}
+
+	conf->event_port_id = port_id;
+	conf->max_nb = DEFAULT_MAX_NB;
+	if (started)
+		ret = rte_event_dev_start(dev_id);
+
+	adapter->default_cb_arg = 1;
+	return ret;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_create_ext(uint8_t id, uint8_t dev_id,
+				rte_event_crypto_adapter_conf_cb conf_cb,
+				enum rte_event_crypto_adapter_mode mode,
+				void *conf_arg)
+{
+	struct rte_event_crypto_adapter *adapter;
+	char mem_name[CRYPTO_ADAPTER_NAME_LEN];
+	struct rte_event_dev_info dev_info;
+	int socket_id;
+	uint8_t i;
+	int ret;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	if (conf_cb == NULL)
+		return -EINVAL;
+
+	if (event_crypto_adapter == NULL) {
+		ret = eca_init();
+		if (ret)
+			return ret;
+	}
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter != NULL) {
+		RTE_EDEV_LOG_ERR("Crypto adapter id %u already exists!", id);
+		return -EEXIST;
+	}
+
+	socket_id = rte_event_dev_socket_id(dev_id);
+	snprintf(mem_name, CRYPTO_ADAPTER_MEM_NAME_LEN,
+		 "rte_event_crypto_adapter_%d", id);
+
+	adapter = rte_zmalloc_socket(mem_name, sizeof(*adapter),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (adapter == NULL) {
+		RTE_EDEV_LOG_ERR("Failed to get mem for event crypto adapter!");
+		return -ENOMEM;
+	}
+
+	ret = rte_event_dev_info_get(dev_id, &dev_info);
+	if (ret < 0) {
+		RTE_EDEV_LOG_ERR("Failed to get info for eventdev %d: %s!",
+				 dev_id, dev_info.driver_name);
+		return ret;
+	}
+
+	adapter->implicit_release_disabled = (dev_info.event_dev_cap &
+			RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE);
+	adapter->eventdev_id = dev_id;
+	adapter->socket_id = socket_id;
+	adapter->conf_cb = conf_cb;
+	adapter->conf_arg = conf_arg;
+	adapter->mode = mode;
+	strcpy(adapter->mem_name, mem_name);
+	adapter->cdevs = rte_zmalloc_socket(adapter->mem_name,
+					rte_cryptodev_count() *
+					sizeof(struct crypto_device_info), 0,
+					socket_id);
+	if (adapter->cdevs == NULL) {
+		RTE_EDEV_LOG_ERR("Failed to get mem for crypto devices\n");
+		rte_free(adapter);
+		return -ENOMEM;
+	}
+
+	rte_spinlock_init(&adapter->lock);
+	for (i = 0; i < rte_cryptodev_count(); i++)
+		adapter->cdevs[i].dev = rte_cryptodev_pmd_get_dev(i);
+
+	event_crypto_adapter[id] = adapter;
+
+	return 0;
+}
+
+
+int __rte_experimental
+rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id,
+				struct rte_event_port_conf *port_config,
+				enum rte_event_crypto_adapter_mode mode)
+{
+	struct rte_event_port_conf *pc;
+	int ret;
+
+	if (port_config == NULL)
+		return -EINVAL;
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	pc = rte_malloc(NULL, sizeof(*pc), 0);
+	if (pc == NULL)
+		return -ENOMEM;
+	*pc = *port_config;
+	ret = rte_event_crypto_adapter_create_ext(id, dev_id,
+						  eca_default_config_cb,
+						  mode,
+						  pc);
+	if (ret)
+		rte_free(pc);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_free(uint8_t id)
+{
+	struct rte_event_crypto_adapter *adapter;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	if (adapter->nb_qps) {
+		RTE_EDEV_LOG_ERR("%" PRIu16 "Queue pairs not deleted",
+				adapter->nb_qps);
+		return -EBUSY;
+	}
+
+	if (adapter->default_cb_arg)
+		rte_free(adapter->conf_arg);
+	rte_free(adapter->cdevs);
+	rte_free(adapter);
+	event_crypto_adapter[id] = NULL;
+
+	return 0;
+}
+
+static inline unsigned int
+eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter,
+		 struct rte_event *ev, unsigned int cnt)
+{
+	struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats;
+	union rte_event_crypto_metadata *m_data = NULL;
+	struct crypto_queue_pair_info *qp_info = NULL;
+	struct rte_crypto_op *crypto_op;
+	unsigned int i, n;
+	uint16_t qp_id, len, ret;
+	uint8_t cdev_id;
+
+	len = 0;
+	ret = 0;
+	n = 0;
+	stats->event_deq_count += cnt;
+
+	for (i = 0; i < cnt; i++) {
+		crypto_op = ev[i].event_ptr;
+		if (crypto_op == NULL)
+			continue;
+		if (crypto_op->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
+			m_data = rte_cryptodev_sym_session_get_user_data(
+					crypto_op->sym->session);
+			if (m_data == NULL) {
+				rte_pktmbuf_free(crypto_op->sym->m_src);
+				rte_crypto_op_free(crypto_op);
+				continue;
+			}
+
+			cdev_id = m_data->request_info.cdev_id;
+			qp_id = m_data->request_info.queue_pair_id;
+			qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id];
+			if (qp_info == NULL) {
+				rte_pktmbuf_free(crypto_op->sym->m_src);
+				rte_crypto_op_free(crypto_op);
+				continue;
+			}
+			len = qp_info->len;
+			qp_info->op_buffer[len] = crypto_op;
+			len++;
+		} else if (crypto_op->sess_type == RTE_CRYPTO_OP_SESSIONLESS &&
+				crypto_op->private_data_offset) {
+			m_data = (union rte_event_crypto_metadata *)
+				 ((uint8_t *)crypto_op +
+					crypto_op->private_data_offset);
+			cdev_id = m_data->request_info.cdev_id;
+			qp_id = m_data->request_info.queue_pair_id;
+			qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id];
+			if (qp_info == NULL) {
+				rte_pktmbuf_free(crypto_op->sym->m_src);
+				rte_crypto_op_free(crypto_op);
+				continue;
+			}
+			len = qp_info->len;
+			qp_info->op_buffer[len] = crypto_op;
+			len++;
+		} else {
+			rte_pktmbuf_free(crypto_op->sym->m_src);
+			rte_crypto_op_free(crypto_op);
+			continue;
+		}
+
+		if (len == BATCH_SIZE) {
+			struct rte_crypto_op **op_buffer = qp_info->op_buffer;
+			ret = rte_cryptodev_enqueue_burst(cdev_id,
+							  qp_id,
+							  op_buffer,
+							  BATCH_SIZE);
+
+			stats->crypto_enq_count += ret;
+
+			while (ret < len) {
+				struct rte_crypto_op *op;
+				op = op_buffer[ret++];
+				stats->crypto_enq_fail++;
+				rte_pktmbuf_free(op->sym->m_src);
+				rte_crypto_op_free(op);
+			}
+
+			len = 0;
+		}
+
+		if (qp_info)
+			qp_info->len = len;
+		n += ret;
+	}
+
+	return n;
+}
+
+static unsigned int
+eca_crypto_enq_flush(struct rte_event_crypto_adapter *adapter)
+{
+	struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats;
+	struct crypto_device_info *curr_dev;
+	struct crypto_queue_pair_info *curr_queue;
+	struct rte_crypto_op **op_buffer;
+	struct rte_cryptodev *dev;
+	uint8_t cdev_id;
+	uint16_t qp;
+	uint16_t ret;
+	uint16_t num_cdev = rte_cryptodev_count();
+
+	ret = 0;
+	for (cdev_id = 0; cdev_id < num_cdev; cdev_id++) {
+		curr_dev = &adapter->cdevs[cdev_id];
+		if (curr_dev == NULL)
+			continue;
+		dev = curr_dev->dev;
+
+		for (qp = 0; qp < dev->data->nb_queue_pairs; qp++) {
+
+			curr_queue = &curr_dev->qpairs[qp];
+			if (!curr_queue->qp_enabled)
+				continue;
+
+			op_buffer = curr_queue->op_buffer;
+			ret = rte_cryptodev_enqueue_burst(cdev_id,
+							  qp,
+							  op_buffer,
+							  curr_queue->len);
+			stats->crypto_enq_count += ret;
+
+			while (ret < curr_queue->len) {
+				struct rte_crypto_op *op;
+				op = op_buffer[ret++];
+				stats->crypto_enq_fail++;
+				rte_pktmbuf_free(op->sym->m_src);
+				rte_crypto_op_free(op);
+			}
+			curr_queue->len = 0;
+		}
+	}
+
+	return ret;
+}
+
+static int
+eca_crypto_adapter_enq_run(struct rte_event_crypto_adapter *adapter,
+			unsigned int max_enq)
+{
+	struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats;
+	struct rte_event ev[BATCH_SIZE];
+	unsigned int nb_enq, nb_enqueued;
+	uint16_t n;
+	uint8_t event_dev_id = adapter->eventdev_id;
+	uint8_t event_port_id = adapter->event_port_id;
+
+	nb_enqueued = 0;
+	if (adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)
+		return 0;
+
+	for (nb_enq = 0; nb_enq < max_enq; nb_enq += n) {
+		stats->event_poll_count++;
+		n = rte_event_dequeue_burst(event_dev_id,
+					    event_port_id, ev, BATCH_SIZE, 0);
+
+		if (!n)
+			break;
+
+		nb_enqueued += eca_enq_to_cryptodev(adapter, ev, n);
+	}
+
+	if ((++adapter->transmit_loop_count &
+		(CRYPTO_ENQ_FLUSH_THRESHOLD - 1)) == 0) {
+		nb_enqueued += eca_crypto_enq_flush(adapter);
+	}
+
+	return nb_enqueued;
+}
+
+static inline void
+eca_ops_enqueue_burst(struct rte_event_crypto_adapter *adapter,
+		  struct rte_crypto_op **ops, uint16_t num)
+{
+	struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats;
+	union rte_event_crypto_metadata *m_data = NULL;
+	uint8_t event_dev_id = adapter->eventdev_id;
+	uint8_t event_port_id = adapter->event_port_id;
+	struct rte_event events[BATCH_SIZE];
+	uint16_t nb_enqueued, nb_ev;
+	uint8_t retry;
+	uint8_t i;
+
+	nb_ev = 0;
+	retry = 0;
+	nb_enqueued = 0;
+	num = RTE_MIN(num, BATCH_SIZE);
+	for (i = 0; i < num; i++) {
+		struct rte_event *ev = &events[nb_ev++];
+		if (ops[i]->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
+			m_data = rte_cryptodev_sym_session_get_user_data(
+					ops[i]->sym->session);
+		} else if (ops[i]->sess_type == RTE_CRYPTO_OP_SESSIONLESS &&
+				ops[i]->private_data_offset) {
+			m_data = (union rte_event_crypto_metadata *)
+				 ((uint8_t *)ops[i] +
+				  ops[i]->private_data_offset);
+		}
+
+		if (unlikely(m_data == NULL)) {
+			rte_pktmbuf_free(ops[i]->sym->m_src);
+			rte_crypto_op_free(ops[i]);
+			continue;
+		}
+
+		rte_memcpy(ev, &m_data->response_info, sizeof(*ev));
+		ev->event_ptr = ops[i];
+		ev->event_type = RTE_EVENT_TYPE_CRYPTODEV;
+		if (adapter->implicit_release_disabled)
+			ev->op = RTE_EVENT_OP_FORWARD;
+		else
+			ev->op = RTE_EVENT_OP_NEW;
+	}
+
+	do {
+		nb_enqueued += rte_event_enqueue_burst(event_dev_id,
+						  event_port_id,
+						  &events[nb_enqueued],
+						  nb_ev - nb_enqueued);
+	} while (retry++ < CRYPTO_ADAPTER_MAX_EV_ENQ_RETRIES &&
+		 nb_enqueued < nb_ev);
+
+	/* Free mbufs and rte_crypto_ops for failed events */
+	for (i = nb_enqueued; i < nb_ev; i++) {
+		struct rte_crypto_op *op = events[i].event_ptr;
+		rte_pktmbuf_free(op->sym->m_src);
+		rte_crypto_op_free(op);
+	}
+
+	stats->event_enq_fail_count += nb_ev - nb_enqueued;
+	stats->event_enq_count += nb_enqueued;
+	stats->event_enq_retry_count += retry - 1;
+}
+
+static inline unsigned int
+eca_crypto_adapter_deq_run(struct rte_event_crypto_adapter *adapter,
+			unsigned int max_deq)
+{
+	struct rte_event_crypto_adapter_stats *stats = &adapter->crypto_stats;
+	struct crypto_device_info *curr_dev;
+	struct crypto_queue_pair_info *curr_queue;
+	struct rte_crypto_op *ops[BATCH_SIZE];
+	uint16_t n, nb_deq;
+	struct rte_cryptodev *dev;
+	uint8_t cdev_id;
+	uint16_t qp, dev_qps;
+	bool done;
+	uint16_t num_cdev = rte_cryptodev_count();
+
+	nb_deq = 0;
+	do {
+		uint16_t queues = 0;
+		done = true;
+
+		for (cdev_id = adapter->next_cdev_id;
+			cdev_id < num_cdev; cdev_id++) {
+			curr_dev = &adapter->cdevs[cdev_id];
+			if (curr_dev == NULL)
+				continue;
+			dev = curr_dev->dev;
+			dev_qps = dev->data->nb_queue_pairs;
+
+			for (qp = curr_dev->next_queue_pair_id;
+				queues < dev_qps; qp = (qp + 1) % dev_qps,
+				queues++) {
+
+				curr_queue = &curr_dev->qpairs[qp];
+				if (!curr_queue->qp_enabled)
+					continue;
+
+				n = rte_cryptodev_dequeue_burst(cdev_id, qp,
+					ops, BATCH_SIZE);
+				if (!n)
+					continue;
+
+				done = false;
+				stats->crypto_deq_count += n;
+				eca_ops_enqueue_burst(adapter, ops, n);
+				nb_deq += n;
+
+				if (nb_deq > max_deq) {
+					if ((qp + 1) == dev_qps) {
+						adapter->next_cdev_id =
+							(cdev_id + 1)
+							% num_cdev;
+					}
+					curr_dev->next_queue_pair_id = (qp + 1)
+						% dev->data->nb_queue_pairs;
+
+					return nb_deq;
+				}
+			}
+		}
+	} while (done == false);
+	return nb_deq;
+}
+
+static void
+eca_crypto_adapter_run(struct rte_event_crypto_adapter *adapter,
+			unsigned int max_ops)
+{
+	while (max_ops) {
+		unsigned int e_cnt, d_cnt;
+
+		e_cnt = eca_crypto_adapter_deq_run(adapter, max_ops);
+		max_ops -= RTE_MIN(max_ops, e_cnt);
+
+		d_cnt = eca_crypto_adapter_enq_run(adapter, max_ops);
+		max_ops -= RTE_MIN(max_ops, d_cnt);
+
+		if (e_cnt == 0 && d_cnt == 0)
+			break;
+
+	}
+}
+
+static int
+eca_service_func(void *args)
+{
+	struct rte_event_crypto_adapter *adapter = args;
+
+	if (rte_spinlock_trylock(&adapter->lock) == 0)
+		return 0;
+	eca_crypto_adapter_run(adapter, adapter->max_nb);
+	rte_spinlock_unlock(&adapter->lock);
+
+	return 0;
+}
+
+static int
+eca_init_service(struct rte_event_crypto_adapter *adapter, uint8_t id)
+{
+	struct rte_event_crypto_adapter_conf adapter_conf;
+	struct rte_service_spec service;
+	int ret;
+
+	if (adapter->service_inited)
+		return 0;
+
+	memset(&service, 0, sizeof(service));
+	snprintf(service.name, CRYPTO_ADAPTER_NAME_LEN,
+		"rte_event_crypto_adapter_%d", id);
+	service.socket_id = adapter->socket_id;
+	service.callback = eca_service_func;
+	service.callback_userdata = adapter;
+	/* Service function handles locking for queue add/del updates */
+	service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
+	ret = rte_service_component_register(&service, &adapter->service_id);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to register service %s err = %" PRId32,
+			service.name, ret);
+		return ret;
+	}
+
+	ret = adapter->conf_cb(id, adapter->eventdev_id,
+		&adapter_conf, adapter->conf_arg);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("configuration callback failed err = %" PRId32,
+			ret);
+		return ret;
+	}
+
+	adapter->max_nb = adapter_conf.max_nb;
+	adapter->event_port_id = adapter_conf.event_port_id;
+	adapter->service_inited = 1;
+
+	return ret;
+}
+
+static void
+eca_update_qp_info(struct rte_event_crypto_adapter *adapter,
+			struct crypto_device_info *dev_info,
+			int32_t queue_pair_id,
+			uint8_t add)
+{
+	struct crypto_queue_pair_info *qp_info;
+	int enabled;
+	uint16_t i;
+
+	if (dev_info->qpairs == NULL)
+		return;
+
+	if (queue_pair_id == -1) {
+		for (i = 0; i < dev_info->dev->data->nb_queue_pairs; i++)
+			eca_update_qp_info(adapter, dev_info, i, add);
+	} else {
+		qp_info = &dev_info->qpairs[queue_pair_id];
+		enabled = qp_info->qp_enabled;
+		if (add) {
+			adapter->nb_qps += !enabled;
+			dev_info->num_qpairs += !enabled;
+		} else {
+			adapter->nb_qps -= enabled;
+			dev_info->num_qpairs -= enabled;
+		}
+		qp_info->qp_enabled = !!add;
+	}
+}
+
+static int
+eca_add_queue_pair(struct rte_event_crypto_adapter *adapter,
+		uint8_t cdev_id,
+		int queue_pair_id)
+{
+	struct crypto_device_info *dev_info = &adapter->cdevs[cdev_id];
+	struct crypto_queue_pair_info *qpairs;
+	uint32_t i;
+
+	if (dev_info->qpairs == NULL) {
+		dev_info->qpairs =
+		    rte_zmalloc_socket(adapter->mem_name,
+					dev_info->dev->data->nb_queue_pairs *
+					sizeof(struct crypto_queue_pair_info),
+					0, adapter->socket_id);
+		if (dev_info->qpairs == NULL)
+			return -ENOMEM;
+
+		qpairs = dev_info->qpairs;
+		qpairs->op_buffer = rte_zmalloc_socket(adapter->mem_name,
+					BATCH_SIZE *
+					sizeof(struct rte_crypto_op *),
+					0, adapter->socket_id);
+		if (!qpairs->op_buffer) {
+			rte_free(qpairs);
+			return -ENOMEM;
+		}
+	}
+
+	if (queue_pair_id == -1) {
+		for (i = 0; i < dev_info->dev->data->nb_queue_pairs; i++)
+			eca_update_qp_info(adapter, dev_info, i, 1);
+	} else
+		eca_update_qp_info(adapter, dev_info,
+					(uint16_t)queue_pair_id, 1);
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_queue_pair_add(uint8_t id,
+			uint8_t cdev_id,
+			int32_t queue_pair_id,
+			const struct rte_event *event)
+{
+	struct rte_event_crypto_adapter *adapter;
+	struct rte_eventdev *dev;
+	struct crypto_device_info *dev_info;
+	uint32_t cap;
+	int ret;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	if (!rte_cryptodev_pmd_is_valid_dev(cdev_id)) {
+		RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id);
+		return -EINVAL;
+	}
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+	ret = rte_event_crypto_adapter_caps_get(adapter->eventdev_id,
+						cdev_id,
+						&cap);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("Failed to get adapter caps dev %" PRIu8
+			" cdev %" PRIu8, id, cdev_id);
+		return ret;
+	}
+
+	if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) &&
+	    (event == NULL)) {
+		RTE_EDEV_LOG_ERR("Conf value can not be NULL for dev_id=%u",
+				  cdev_id);
+		return -EINVAL;
+	}
+
+	dev_info = &adapter->cdevs[cdev_id];
+
+	if (queue_pair_id != -1 &&
+	    (uint16_t)queue_pair_id >= dev_info->dev->data->nb_queue_pairs) {
+		RTE_EDEV_LOG_ERR("Invalid queue_pair_id %" PRIu16,
+				 (uint16_t)queue_pair_id);
+		return -EINVAL;
+	}
+
+	/* In case HW cap is RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD,
+	 * no need of service core as HW supports event forward capability.
+	 */
+	if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) ||
+	    (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND &&
+	     adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW) ||
+	    (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW &&
+	     adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)) {
+		RTE_FUNC_PTR_OR_ERR_RET(
+			*dev->dev_ops->crypto_adapter_queue_pair_add,
+			-ENOTSUP);
+		if (dev_info->qpairs == NULL) {
+			dev_info->qpairs =
+			    rte_zmalloc_socket(adapter->mem_name,
+					dev_info->dev->data->nb_queue_pairs *
+					sizeof(struct crypto_queue_pair_info),
+					0, adapter->socket_id);
+			if (dev_info->qpairs == NULL)
+				return -ENOMEM;
+		}
+
+		ret = (*dev->dev_ops->crypto_adapter_queue_pair_add)(dev,
+				dev_info->dev,
+				queue_pair_id,
+				event);
+		if (ret)
+			return ret;
+
+		else
+			eca_update_qp_info(adapter, &adapter->cdevs[cdev_id],
+					   queue_pair_id, 1);
+	}
+
+	/* In case HW cap is RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW,
+	 * or SW adapter, initiate services so the application can choose
+	 * which ever way it wants to use the adapter.
+	 * Case 1: RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW
+	 *         Application may wants to use one of below two mode
+	 *          a. OP_FORWARD mode -> HW Dequeue + SW enqueue
+	 *          b. OP_NEW mode -> HW Dequeue
+	 * Case 2: No HW caps, use SW adapter
+	 *          a. OP_FORWARD mode -> SW enqueue & dequeue
+	 *          b. OP_NEW mode -> SW Dequeue
+	 */
+	if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW &&
+	     adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD) ||
+	     (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW) &&
+	      !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) &&
+	      !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) &&
+	       (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA))) {
+		rte_spinlock_lock(&adapter->lock);
+		ret = eca_init_service(adapter, id);
+		if (ret == 0)
+			ret = eca_add_queue_pair(adapter, cdev_id,
+						 queue_pair_id);
+		rte_spinlock_unlock(&adapter->lock);
+
+		if (ret)
+			return ret;
+
+		rte_service_component_runstate_set(adapter->service_id, 1);
+	}
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_queue_pair_del(uint8_t id, uint8_t cdev_id,
+					int32_t queue_pair_id)
+{
+	struct rte_event_crypto_adapter *adapter;
+	struct crypto_device_info *dev_info;
+	struct rte_eventdev *dev;
+	int ret;
+	uint32_t cap;
+	uint16_t i;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	if (!rte_cryptodev_pmd_is_valid_dev(cdev_id)) {
+		RTE_EDEV_LOG_ERR("Invalid dev_id=%" PRIu8, cdev_id);
+		return -EINVAL;
+	}
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+	ret = rte_event_crypto_adapter_caps_get(adapter->eventdev_id,
+						cdev_id,
+						&cap);
+	if (ret)
+		return ret;
+
+	dev_info = &adapter->cdevs[cdev_id];
+
+	if (queue_pair_id != -1 &&
+	    (uint16_t)queue_pair_id >= dev_info->dev->data->nb_queue_pairs) {
+		RTE_EDEV_LOG_ERR("Invalid queue_pair_id %" PRIu16,
+				 (uint16_t)queue_pair_id);
+		return -EINVAL;
+	}
+
+	if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) ||
+	    (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW &&
+	     adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)) {
+		RTE_FUNC_PTR_OR_ERR_RET(
+			*dev->dev_ops->crypto_adapter_queue_pair_del,
+			-ENOTSUP);
+		ret = (*dev->dev_ops->crypto_adapter_queue_pair_del)(dev,
+						dev_info->dev,
+						queue_pair_id);
+		if (ret == 0) {
+			eca_update_qp_info(adapter,
+					&adapter->cdevs[cdev_id],
+					queue_pair_id,
+					0);
+			if (dev_info->num_qpairs == 0) {
+				rte_free(dev_info->qpairs);
+				dev_info->qpairs = NULL;
+			}
+		}
+	} else {
+		if (adapter->nb_qps == 0)
+			return 0;
+
+		rte_spinlock_lock(&adapter->lock);
+		if (queue_pair_id == -1) {
+			for (i = 0; i < dev_info->dev->data->nb_queue_pairs;
+				i++)
+				eca_update_qp_info(adapter, dev_info,
+							queue_pair_id, 0);
+		} else {
+			eca_update_qp_info(adapter, dev_info,
+						(uint16_t)queue_pair_id, 0);
+		}
+
+		if (dev_info->num_qpairs == 0) {
+			rte_free(dev_info->qpairs);
+			dev_info->qpairs = NULL;
+		}
+
+		rte_spinlock_unlock(&adapter->lock);
+		rte_service_component_runstate_set(adapter->service_id,
+				adapter->nb_qps);
+	}
+
+	return ret;
+}
+
+static int
+eca_adapter_ctrl(uint8_t id, int start)
+{
+	struct rte_event_crypto_adapter *adapter;
+	struct crypto_device_info *dev_info;
+	struct rte_eventdev *dev;
+	uint32_t i;
+	int use_service;
+	int stop = !start;
+
+	use_service = 0;
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+
+	for (i = 0; i < rte_cryptodev_count(); i++) {
+		dev_info = &adapter->cdevs[i];
+		/* if start  check for num queue pairs */
+		if (start && !dev_info->num_qpairs)
+			continue;
+		/* if stop check if dev has been started */
+		if (stop && !dev_info->dev_started)
+			continue;
+		use_service |= !dev_info->internal_event_port;
+		dev_info->dev_started = start;
+		if (dev_info->internal_event_port == 0)
+			continue;
+		start ? (*dev->dev_ops->crypto_adapter_start)(dev,
+						&dev_info->dev[i]) :
+			(*dev->dev_ops->crypto_adapter_stop)(dev,
+						&dev_info->dev[i]);
+	}
+
+	if (use_service)
+		rte_service_runstate_set(adapter->service_id, start);
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_start(uint8_t id)
+{
+	struct rte_event_crypto_adapter *adapter;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	return eca_adapter_ctrl(id, 1);
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_stop(uint8_t id)
+{
+	return eca_adapter_ctrl(id, 0);
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_stats_get(uint8_t id,
+				struct rte_event_crypto_adapter_stats *stats)
+{
+	struct rte_event_crypto_adapter *adapter;
+	struct rte_event_crypto_adapter_stats dev_stats_sum = { 0 };
+	struct rte_event_crypto_adapter_stats dev_stats;
+	struct rte_eventdev *dev;
+	struct crypto_device_info *dev_info;
+	uint32_t i;
+	int ret;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL || stats == NULL)
+		return -EINVAL;
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+	memset(stats, 0, sizeof(*stats));
+	for (i = 0; i < rte_cryptodev_count(); i++) {
+		dev_info = &adapter->cdevs[i];
+		if (dev_info->internal_event_port == 0 ||
+			dev->dev_ops->crypto_adapter_stats_get == NULL)
+			continue;
+		ret = (*dev->dev_ops->crypto_adapter_stats_get)(dev,
+						dev_info->dev,
+						&dev_stats);
+		if (ret)
+			continue;
+
+		dev_stats_sum.crypto_deq_count += dev_stats.crypto_deq_count;
+		dev_stats_sum.event_enq_count +=
+			dev_stats.event_enq_count;
+	}
+
+	if (adapter->service_inited)
+		*stats = adapter->crypto_stats;
+
+	stats->crypto_deq_count += dev_stats_sum.crypto_deq_count;
+	stats->event_enq_count += dev_stats_sum.event_enq_count;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_stats_reset(uint8_t id)
+{
+	struct rte_event_crypto_adapter *adapter;
+	struct crypto_device_info *dev_info;
+	struct rte_eventdev *dev;
+	uint32_t i;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL)
+		return -EINVAL;
+
+	dev = &rte_eventdevs[adapter->eventdev_id];
+	for (i = 0; i < rte_cryptodev_count(); i++) {
+		dev_info = &adapter->cdevs[i];
+		if (dev_info->internal_event_port == 0 ||
+			dev->dev_ops->crypto_adapter_stats_reset == NULL)
+			continue;
+		(*dev->dev_ops->crypto_adapter_stats_reset)(dev,
+						dev_info->dev);
+	}
+
+	memset(&adapter->crypto_stats, 0, sizeof(adapter->crypto_stats));
+	return 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_service_id_get(uint8_t id, uint32_t *service_id)
+{
+	struct rte_event_crypto_adapter *adapter;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL || service_id == NULL)
+		return -EINVAL;
+
+	if (adapter->service_inited)
+		*service_id = adapter->service_id;
+
+	return adapter->service_inited ? 0 : -ESRCH;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_event_port_get(uint8_t id, uint8_t *event_port_id)
+{
+	struct rte_event_crypto_adapter *adapter;
+
+	EVENT_CRYPTO_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+	adapter = eca_id_to_adapter(id);
+	if (adapter == NULL || event_port_id == NULL)
+		return -EINVAL;
+
+	*event_port_id = adapter->event_port_id;
+
+	return 0;
+}
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.h b/lib/librte_eventdev/rte_event_crypto_adapter.h
new file mode 100644
index 00000000..d367309c
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.h
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _RTE_EVENT_CRYPTO_ADAPTER_
+#define _RTE_EVENT_CRYPTO_ADAPTER_
+
+/**
+ * @file
+ *
+ * RTE Event crypto adapter
+ *
+ * Eventdev library provides couple of adapters to bridge between various
+ * components for providing new event source. The event crypto adapter is
+ * one of those adapters which is intended to bridge between event devices
+ * and crypto devices.
+ *
+ * The crypto adapter adds support to enqueue/dequeue crypto operations to/
+ * from event device. The packet flow between crypto device and the event
+ * device can be accomplished using both SW and HW based transfer mechanisms.
+ * The adapter uses an EAL service core function for SW based packet transfer
+ * and uses the eventdev PMD functions to configure HW based packet transfer
+ * between the crypto device and the event device.
+ *
+ * The application can choose to submit a crypto operation directly to
+ * crypto device or send it to the crypto adapter via eventdev based on
+ * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability.
+ * The first mode is known as the event new(RTE_EVENT_CRYPTO_ADAPTER_OP_NEW)
+ * mode and the second as the event forward(RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD)
+ * mode. The choice of mode can be specified while creating the adapter.
+ * In the former mode, it is an application responsibility to enable ingress
+ * packet ordering. In the latter mode, it is the adapter responsibility to
+ * enable the ingress packet ordering.
+ *
+ *
+ * Working model of RTE_EVENT_CRYPTO_ADAPTER_OP_NEW mode:
+ *
+ *                +--------------+         +--------------+
+ *                |              |         | Crypto stage |
+ *                | Application  |---[2]-->| + enqueue to |
+ *                |              |         |   cryptodev  |
+ *                +--------------+         +--------------+
+ *                    ^   ^                       |
+ *                    |   |                      [3]
+ *                   [6] [1]                      |
+ *                    |   |                       |
+ *                +--------------+                |
+ *                |              |                |
+ *                | Event device |                |
+ *                |              |                |
+ *                +--------------+                |
+ *                       ^                        |
+ *                       |                        |
+ *                      [5]                       |
+ *                       |                        v
+ *                +--------------+         +--------------+
+ *                |              |         |              |
+ *                |Crypto adapter|<--[4]---|  Cryptodev   |
+ *                |              |         |              |
+ *                +--------------+         +--------------+
+ *
+ *
+ *         [1] Application dequeues events from the previous stage.
+ *         [2] Application prepares the crypto operations.
+ *         [3] Crypto operations are submitted to cryptodev by application.
+ *         [4] Crypto adapter dequeues crypto completions from cryptodev.
+ *         [5] Crypto adapter enqueues events to the eventdev.
+ *         [6] Application dequeues from eventdev and prepare for further
+ *             processing.
+ *
+ * In the RTE_EVENT_CRYPTO_ADAPTER_OP_NEW mode, application submits crypto
+ * operations directly to crypto device. The adapter then dequeues crypto
+ * completions from crypto device and enqueue events to the event device.
+ * This mode does not ensure ingress ordering, if the application directly
+ * enqueues to cryptodev without going through crypto/atomic stage i.e.
+ * removing item [1] and [2].
+ * Events dequeued from the adapter will be treated as new events.
+ * In this mode, application needs to specify event information (response
+ * information) which is needed to enqueue an event after the crypto operation
+ * is completed.
+ *
+ *
+ * Working model of RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode:
+ *
+ *                +--------------+         +--------------+
+ *        --[1]-->|              |---[2]-->|  Application |
+ *                | Event device |         |      in      |
+ *        <--[8]--|              |<--[3]---| Ordered stage|
+ *                +--------------+         +--------------+
+ *                    ^      |
+ *                    |     [4]
+ *                   [7]     |
+ *                    |      v
+ *               +----------------+       +--------------+
+ *               |                |--[5]->|              |
+ *               | Crypto adapter |       |   Cryptodev  |
+ *               |                |<-[6]--|              |
+ *               +----------------+       +--------------+
+ *
+ *
+ *         [1] Events from the previous stage.
+ *         [2] Application in ordered stage dequeues events from eventdev.
+ *         [3] Application enqueues crypto operations as events to eventdev.
+ *         [4] Crypto adapter dequeues event from eventdev.
+ *         [5] Crypto adapter submits crypto operations to cryptodev
+ *             (Atomic stage).
+ *         [6] Crypto adapter dequeues crypto completions from cryptodev
+ *         [7] Crypto adapter enqueues events to the eventdev
+ *         [8] Events to the next stage
+ *
+ * In the RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode, if HW supports
+ * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability the application
+ * can directly submit the crypto operations to the cryptodev.
+ * If not, application retrieves crypto adapter's event port using
+ * rte_event_crypto_adapter_event_port_get() API. Then, links its event
+ * queue to this port and starts enqueuing crypto operations as events
+ * to the eventdev. The adapter then dequeues the events and submits the
+ * crypto operations to the cryptodev. After the crypto completions, the
+ * adapter enqueues events to the event device.
+ * Application can use this mode, when ingress packet ordering is needed.
+ * Events dequeued from the adapter will be treated as forwarded events.
+ * In this mode, the application needs to specify the cryptodev ID
+ * and queue pair ID (request information) needed to enqueue a crypto
+ * operation in addition to the event information (response information)
+ * needed to enqueue an event after the crypto operation has completed.
+ *
+ *
+ * The event crypto adapter provides common APIs to configure the packet flow
+ * from the crypto device to event devices for both SW and HW based transfers.
+ * The crypto event adapter's functions are:
+ *  - rte_event_crypto_adapter_create_ext()
+ *  - rte_event_crypto_adapter_create()
+ *  - rte_event_crypto_adapter_free()
+ *  - rte_event_crypto_adapter_queue_pair_add()
+ *  - rte_event_crypto_adapter_queue_pair_del()
+ *  - rte_event_crypto_adapter_start()
+ *  - rte_event_crypto_adapter_stop()
+ *  - rte_event_crypto_adapter_stats_get()
+ *  - rte_event_crypto_adapter_stats_reset()
+
+ * The applicaton creates an instance using rte_event_crypto_adapter_create()
+ * or rte_event_crypto_adapter_create_ext().
+ *
+ * Cryptodev queue pair addition/deletion is done using the
+ * rte_event_crypto_adapter_queue_pair_xxx() APIs. If HW supports
+ * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND capability, event
+ * information must be passed to the add API.
+ *
+ * The SW adapter or HW PMD uses rte_crypto_op::sess_type to decide whether
+ * request/response(private) data is located in the crypto/security session
+ * or at an offset in the rte_crypto_op.
+ *
+ * For session-based operations, the set and get API provides a mechanism for
+ * an application to store and retrieve the data information stored
+ * along with the crypto session.
+ * The RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA capability indicates
+ * whether HW or SW supports this feature.
+ *
+ * For session-less mode, the adapter gets the private data information placed
+ * along with the ``struct rte_crypto_op``.
+ * The rte_crypto_op::private_data_offset provides an offset to locate the
+ * request/response information in the rte_crypto_op. This offset is counted
+ * from the start of the rte_crypto_op including initialization vector (IV).
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include "rte_eventdev.h"
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this enum may change without prior notice
+ *
+ * Crypto event adapter mode
+ */
+enum rte_event_crypto_adapter_mode {
+	RTE_EVENT_CRYPTO_ADAPTER_OP_NEW,
+	/**< Start the crypto adapter in event new mode.
+	 * @see RTE_EVENT_OP_NEW.
+	 * Application submits crypto operations to the cryptodev.
+	 * Adapter only dequeues the crypto completions from cryptodev
+	 * and enqueue events to the eventdev.
+	 */
+	RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD,
+	/**< Start the crypto adapter in event forward mode.
+	 * @see RTE_EVENT_OP_FORWARD.
+	 * Application submits crypto requests as events to the crypto
+	 * adapter or crypto device based on
+	 * RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD capability.
+	 * Crypto completions are enqueued back to the eventdev by
+	 * crypto adapter.
+	 */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Crypto event request structure will be filled by application to
+ * provide event request information to the adapter.
+ */
+struct rte_event_crypto_request {
+	uint8_t resv[8];
+	/**< Overlaps with first 8 bytes of struct rte_event
+	 * that encode the response event information. Application
+	 * is expected to fill in struct rte_event response_info.
+	 */
+	uint16_t cdev_id;
+	/**< cryptodev ID to be used */
+	uint16_t queue_pair_id;
+	/**< cryptodev queue pair ID to be used */
+	uint32_t resv1;
+	/**< Reserved bits */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Crypto event metadata structure will be filled by application
+ * to provide crypto request and event response information.
+ *
+ * If crypto events are enqueued using a HW mechanism, the cryptodev
+ * PMD will use the event response information to set up the event
+ * that is enqueued back to eventdev after completion of the crypto
+ * operation. If the transfer is done by SW, event response information
+ * will be used by the adapter.
+ */
+union rte_event_crypto_metadata {
+	struct rte_event_crypto_request request_info;
+	/**< Request information to be filled in by application
+	 * for RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode.
+	 */
+	struct rte_event response_info;
+	/**< Response information to be filled in by application
+	 * for RTE_EVENT_CRYPTO_ADAPTER_OP_NEW and
+	 * RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode.
+	 */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Adapter configuration structure that the adapter configuration callback
+ * function is expected to fill out
+ * @see rte_event_crypto_adapter_conf_cb
+ */
+struct rte_event_crypto_adapter_conf {
+	uint8_t event_port_id;
+	/**< Event port identifier, the adapter enqueues events to this
+	 * port and dequeues crypto request events in
+	 * RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode.
+	 */
+	uint32_t max_nb;
+	/**< The adapter can return early if it has processed at least
+	 * max_nb crypto ops. This isn't treated as a requirement; batching
+	 * may cause the adapter to process more than max_nb crypto ops.
+	 */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Function type used for adapter configuration callback. The callback is
+ * used to fill in members of the struct rte_event_crypto_adapter_conf, this
+ * callback is invoked when creating a SW service for packet transfer from
+ * cryptodev queue pair to the event device. The SW service is created within
+ * the rte_event_crypto_adapter_queue_pair_add() function if SW based packet
+ * transfers from cryptodev queue pair to the event device are required.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param dev_id
+ *  Event device identifier.
+ *
+ * @param conf
+ *  Structure that needs to be populated by this callback.
+ *
+ * @param arg
+ *  Argument to the callback. This is the same as the conf_arg passed to the
+ *  rte_event_crypto_adapter_create_ext().
+ */
+typedef int (*rte_event_crypto_adapter_conf_cb) (uint8_t id, uint8_t dev_id,
+			struct rte_event_crypto_adapter_conf *conf,
+			void *arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * A structure used to retrieve statistics for an event crypto adapter
+ * instance.
+ */
+
+struct rte_event_crypto_adapter_stats {
+	uint64_t event_poll_count;
+	/**< Event port poll count */
+	uint64_t event_deq_count;
+	/**< Event dequeue count */
+	uint64_t crypto_enq_count;
+	/**< Cryptodev enqueue count */
+	uint64_t crypto_enq_fail;
+	/**< Cryptodev enqueue failed count */
+	uint64_t crypto_deq_count;
+	/**< Cryptodev dequeue count */
+	uint64_t event_enq_count;
+	/**< Event enqueue count */
+	uint64_t event_enq_retry_count;
+	/**< Event enqueue retry count */
+	uint64_t event_enq_fail_count;
+	/**< Event enqueue fail count */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new event crypto adapter with the specified identifier.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param dev_id
+ *  Event device identifier.
+ *
+ * @param conf_cb
+ *  Callback function that fills in members of a
+ *  struct rte_event_crypto_adapter_conf struct passed into
+ *  it.
+ *
+ * @param mode
+ *  Flag to indicate the mode of the adapter.
+ *  @see rte_event_crypto_adapter_mode
+ *
+ * @param conf_arg
+ *  Argument that is passed to the conf_cb function.
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_crypto_adapter_create_ext(uint8_t id, uint8_t dev_id,
+				    rte_event_crypto_adapter_conf_cb conf_cb,
+				    enum rte_event_crypto_adapter_mode mode,
+				    void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new event crypto adapter with the specified identifier.
+ * This function uses an internal configuration function that creates an event
+ * port. This default function reconfigures the event device with an
+ * additional event port and set up the event port using the port_config
+ * parameter passed into this function. In case the application needs more
+ * control in configuration of the service, it should use the
+ * rte_event_crypto_adapter_create_ext() version.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param dev_id
+ *  Event device identifier.
+ *
+ * @param port_config
+ *  Argument of type *rte_event_port_conf* that is passed to the conf_cb
+ *  function.
+ *
+ * @param mode
+ *  Flag to indicate the mode of the adapter.
+ *  @see rte_event_crypto_adapter_mode
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id,
+				struct rte_event_port_conf *port_config,
+				enum rte_event_crypto_adapter_mode mode);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free an event crypto adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure, If the adapter still has queue pairs
+ *      added to it, the function returns -EBUSY.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_free(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add a queue pair to an event crypto adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param cdev_id
+ *  Cryptodev identifier.
+ *
+ * @param queue_pair_id
+ *  Cryptodev queue pair identifier. If queue_pair_id is set -1,
+ *  adapter adds all the pre configured queue pairs to the instance.
+ *
+ * @param event
+ *  if HW supports cryptodev queue pair to event queue binding, application is
+ *  expected to fill in event information, else it will be NULL.
+ *  @see RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND
+ *
+ * @return
+ *  - 0: Success, queue pair added correctly.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_queue_pair_add(uint8_t id,
+			uint8_t cdev_id,
+			int32_t queue_pair_id,
+			const struct rte_event *event);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete a queue pair from an event crypto adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param cdev_id
+ *  Cryptodev identifier.
+ *
+ * @param queue_pair_id
+ *  Cryptodev queue pair identifier.
+ *
+ * @return
+ *  - 0: Success, queue pair deleted successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_queue_pair_del(uint8_t id, uint8_t cdev_id,
+					int32_t queue_pair_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start event crypto adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ *
+ * @return
+ *  - 0: Success, adapter started successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_start(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop event crypto adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @return
+ *  - 0: Success, adapter stopped successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_stop(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param [out] stats
+ *  A pointer to structure used to retrieve statistics for an adapter.
+ *
+ * @return
+ *  - 0: Success, retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_stats_get(uint8_t id,
+				struct rte_event_crypto_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @return
+ *  - 0: Success, statistics reset successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_stats_reset(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the service ID of an adapter. If the adapter doesn't use
+ * a rte_service function, this function returns -ESRCH.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param [out] service_id
+ *  A pointer to a uint32_t, to be filled in with the service id.
+ *
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure, if the adapter doesn't use a rte_service
+ * function, this function returns -ESRCH.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_service_id_get(uint8_t id, uint32_t *service_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the event port of an adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ *
+ * @param [out] event_port_id
+ *  Application links its event queue to this adapter port which is used
+ *  in RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD mode.
+ *
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_crypto_adapter_event_port_get(uint8_t id, uint8_t *event_port_id);
+
+#ifdef __cplusplus
+}
+#endif
+#endif	/* _RTE_EVENT_CRYPTO_ADAPTER_ */
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 9aece9f8..f5e5a0b5 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -1,3 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation.
+ * All rights reserved.
+ */
+#if defined(LINUX)
+#include <sys/epoll.h>
+#endif
+#include <unistd.h>
+
 #include <rte_cycles.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -7,6 +16,7 @@
 #include <rte_malloc.h>
 #include <rte_service_component.h>
 #include <rte_thash.h>
+#include <rte_interrupts.h>
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
@@ -20,6 +30,22 @@
 #define ETH_RX_ADAPTER_MEM_NAME_LEN	32
 
 #define RSS_KEY_SIZE	40
+/* value written to intr thread pipe to signal thread exit */
+#define ETH_BRIDGE_INTR_THREAD_EXIT	1
+/* Sentinel value to detect initialized file handle */
+#define INIT_FD		-1
+
+/*
+ * Used to store port and queue ID of interrupting Rx queue
+ */
+union queue_data {
+	RTE_STD_C11
+	void *ptr;
+	struct {
+		uint16_t port;
+		uint16_t queue;
+	};
+};
 
 /*
  * There is an instance of this struct per polled Rx queue added to the
@@ -27,7 +53,7 @@
  */
 struct eth_rx_poll_entry {
 	/* Eth port to poll */
-	uint8_t eth_dev_id;
+	uint16_t eth_dev_id;
 	/* Eth rx queue to poll */
 	uint16_t eth_rx_qid;
 };
@@ -71,6 +97,30 @@ struct rte_event_eth_rx_adapter {
 	uint16_t enq_block_count;
 	/* Block start ts */
 	uint64_t rx_enq_block_start_ts;
+	/* epoll fd used to wait for Rx interrupts */
+	int epd;
+	/* Num of interrupt driven interrupt queues */
+	uint32_t num_rx_intr;
+	/* Used to send <dev id, queue id> of interrupting Rx queues from
+	 * the interrupt thread to the Rx thread
+	 */
+	struct rte_ring *intr_ring;
+	/* Rx Queue data (dev id, queue id) for the last non-empty
+	 * queue polled
+	 */
+	union queue_data qd;
+	/* queue_data is valid */
+	int qd_valid;
+	/* Interrupt ring lock, synchronizes Rx thread
+	 * and interrupt thread
+	 */
+	rte_spinlock_t intr_ring_lock;
+	/* event array passed to rte_poll_wait */
+	struct rte_epoll_event *epoll_events;
+	/* Count of interrupt vectors in use */
+	uint32_t num_intr_vec;
+	/* Thread blocked on Rx interrupts */
+	pthread_t rx_intr_thread;
 	/* Configuration callback for rte_service configuration */
 	rte_event_eth_rx_adapter_conf_cb conf_cb;
 	/* Configuration callback argument */
@@ -87,12 +137,20 @@ struct rte_event_eth_rx_adapter {
 	int socket_id;
 	/* Per adapter EAL service */
 	uint32_t service_id;
+	/* Adapter started flag */
+	uint8_t rxa_started;
+	/* Adapter ID */
+	uint8_t id;
 } __rte_cache_aligned;
 
 /* Per eth device */
 struct eth_device_info {
 	struct rte_eth_dev *dev;
 	struct eth_rx_queue_info *rx_queue;
+	/* Rx callback */
+	rte_event_eth_rx_adapter_cb_fn cb_fn;
+	/* Rx callback argument */
+	void *cb_arg;
 	/* Set if ethdev->eventdev packet transfer uses a
 	 * hardware mechanism
 	 */
@@ -103,15 +161,42 @@ struct eth_device_info {
 	 * rx_adapter_stop callback needs to be invoked
 	 */
 	uint8_t dev_rx_started;
-	/* If nb_dev_queues > 0, the start callback will
+	/* Number of queues added for this device */
+	uint16_t nb_dev_queues;
+	/* Number of poll based queues
+	 * If nb_rx_poll > 0, the start callback will
 	 * be invoked if not already invoked
 	 */
-	uint16_t nb_dev_queues;
+	uint16_t nb_rx_poll;
+	/* Number of interrupt based queues
+	 * If nb_rx_intr > 0, the start callback will
+	 * be invoked if not already invoked.
+	 */
+	uint16_t nb_rx_intr;
+	/* Number of queues that use the shared interrupt */
+	uint16_t nb_shared_intr;
+	/* sum(wrr(q)) for all queues within the device
+	 * useful when deleting all device queues
+	 */
+	uint32_t wrr_len;
+	/* Intr based queue index to start polling from, this is used
+	 * if the number of shared interrupts is non-zero
+	 */
+	uint16_t next_q_idx;
+	/* Intr based queue indices */
+	uint16_t *intr_queue;
+	/* device generates per Rx queue interrupt for queue index
+	 * for queue indices < RTE_MAX_RXTX_INTR_VEC_ID - 1
+	 */
+	int multi_intr_cap;
+	/* shared interrupt enabled */
+	int shared_intr_enabled;
 };
 
 /* Per Rx queue */
 struct eth_rx_queue_info {
 	int queue_enabled;	/* True if added */
+	int intr_enabled;
 	uint16_t wt;		/* Polling weight */
 	uint8_t event_queue_id;	/* Event queue to enqueue packets to */
 	uint8_t sched_type;	/* Sched type for events */
@@ -123,30 +208,30 @@ struct eth_rx_queue_info {
 static struct rte_event_eth_rx_adapter **event_eth_rx_adapter;
 
 static inline int
-valid_id(uint8_t id)
+rxa_validate_id(uint8_t id)
 {
 	return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE;
 }
 
 #define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \
-	if (!valid_id(id)) { \
+	if (!rxa_validate_id(id)) { \
 		RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \
 		return retval; \
 	} \
 } while (0)
 
 static inline int
-sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_sw_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
 {
-	return rx_adapter->num_rx_polled;
+	return rx_adapter->num_rx_polled + rx_adapter->num_rx_intr;
 }
 
 /* Greatest common divisor */
-static uint16_t gcd_u16(uint16_t a, uint16_t b)
+static uint16_t rxa_gcd_u16(uint16_t a, uint16_t b)
 {
 	uint16_t r = a % b;
 
-	return r ? gcd_u16(b, r) : b;
+	return r ? rxa_gcd_u16(b, r) : b;
 }
 
 /* Returns the next queue in the polling sequence
@@ -154,7 +239,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
  * http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling
  */
 static int
-wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
 	 unsigned int n, int *cw,
 	 struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt,
 	 uint16_t gcd, int prev)
@@ -164,7 +249,7 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
 
 	while (1) {
 		uint16_t q;
-		uint8_t d;
+		uint16_t d;
 
 		i = (i + 1) % n;
 		if (i == 0) {
@@ -182,13 +267,298 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
 	}
 }
 
-/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static inline int
+rxa_shared_intr(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	int multi_intr_cap;
+
+	if (dev_info->dev->intr_handle == NULL)
+		return 0;
+
+	multi_intr_cap = rte_intr_cap_multiple(dev_info->dev->intr_handle);
+	return !multi_intr_cap ||
+		rx_queue_id >= RTE_MAX_RXTX_INTR_VEC_ID - 1;
+}
+
+static inline int
+rxa_intr_queue(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	struct eth_rx_queue_info *queue_info;
+
+	queue_info = &dev_info->rx_queue[rx_queue_id];
+	return dev_info->rx_queue &&
+		!dev_info->internal_event_port &&
+		queue_info->queue_enabled && queue_info->wt == 0;
+}
+
+static inline int
+rxa_polled_queue(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	struct eth_rx_queue_info *queue_info;
+
+	queue_info = &dev_info->rx_queue[rx_queue_id];
+	return !dev_info->internal_event_port &&
+		dev_info->rx_queue &&
+		queue_info->queue_enabled && queue_info->wt != 0;
+}
+
+/* Calculate change in number of vectors after Rx queue ID is add/deleted */
 static int
-eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_nb_intr_vect(struct eth_device_info *dev_info, int rx_queue_id, int add)
 {
-	uint8_t d;
+	uint16_t i;
+	int n, s;
+	uint16_t nbq;
+
+	nbq = dev_info->dev->data->nb_rx_queues;
+	n = 0; /* non shared count */
+	s = 0; /* shared count */
+
+	if (rx_queue_id == -1) {
+		for (i = 0; i < nbq; i++) {
+			if (!rxa_shared_intr(dev_info, i))
+				n += add ? !rxa_intr_queue(dev_info, i) :
+					rxa_intr_queue(dev_info, i);
+			else
+				s += add ? !rxa_intr_queue(dev_info, i) :
+					rxa_intr_queue(dev_info, i);
+		}
+
+		if (s > 0) {
+			if ((add && dev_info->nb_shared_intr == 0) ||
+				(!add && dev_info->nb_shared_intr))
+				n += 1;
+		}
+	} else {
+		if (!rxa_shared_intr(dev_info, rx_queue_id))
+			n = add ? !rxa_intr_queue(dev_info, rx_queue_id) :
+				rxa_intr_queue(dev_info, rx_queue_id);
+		else
+			n = add ? !dev_info->nb_shared_intr :
+				dev_info->nb_shared_intr == 1;
+	}
+
+	return add ? n : -n;
+}
+
+/* Calculate nb_rx_intr after deleting interrupt mode rx queues
+ */
+static void
+rxa_calc_nb_post_intr_del(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_intr)
+{
+	uint32_t intr_diff;
+
+	if (rx_queue_id == -1)
+		intr_diff = dev_info->nb_rx_intr;
+	else
+		intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+
+	*nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+}
+
+/* Calculate nb_rx_* after adding interrupt mode rx queues, newly added
+ * interrupt queues could currently be poll mode Rx queues
+ */
+static void
+rxa_calc_nb_post_add_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_rx_intr,
+			uint32_t *nb_wrr)
+{
+	uint32_t intr_diff;
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		intr_diff = dev_info->dev->data->nb_rx_queues -
+						dev_info->nb_rx_intr;
+		poll_diff = dev_info->nb_rx_poll;
+		wrr_len_diff = dev_info->wrr_len;
+	} else {
+		intr_diff = !rxa_intr_queue(dev_info, rx_queue_id);
+		poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+					0;
+	}
+
+	*nb_rx_intr = rx_adapter->num_rx_intr + intr_diff;
+	*nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+	*nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate size of the eth_rx_poll and wrr_sched arrays
+ * after deleting poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_poll_del(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_wrr)
+{
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		poll_diff = dev_info->nb_rx_poll;
+		wrr_len_diff = dev_info->wrr_len;
+	} else {
+		poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+					0;
+	}
+
+	*nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+	*nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_add_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint16_t wt,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_rx_intr,
+			uint32_t *nb_wrr)
+{
+	uint32_t intr_diff;
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		intr_diff = dev_info->nb_rx_intr;
+		poll_diff = dev_info->dev->data->nb_rx_queues -
+						dev_info->nb_rx_poll;
+		wrr_len_diff = wt*dev_info->dev->data->nb_rx_queues
+				- dev_info->wrr_len;
+	} else {
+		intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+		poll_diff = !rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = rxa_polled_queue(dev_info, rx_queue_id) ?
+				wt - dev_info->rx_queue[rx_queue_id].wt :
+				wt;
+	}
+
+	*nb_rx_poll = rx_adapter->num_rx_polled + poll_diff;
+	*nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+	*nb_wrr = rx_adapter->wrr_len + wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding rx_queue_id */
+static void
+rxa_calc_nb_post_add(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id,
+		uint16_t wt,
+		uint32_t *nb_rx_poll,
+		uint32_t *nb_rx_intr,
+		uint32_t *nb_wrr)
+{
+	if (wt != 0)
+		rxa_calc_nb_post_add_poll(rx_adapter, dev_info, rx_queue_id,
+					wt, nb_rx_poll, nb_rx_intr, nb_wrr);
+	else
+		rxa_calc_nb_post_add_intr(rx_adapter, dev_info, rx_queue_id,
+					nb_rx_poll, nb_rx_intr, nb_wrr);
+}
+
+/* Calculate nb_rx_* after deleting rx_queue_id */
+static void
+rxa_calc_nb_post_del(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id,
+		uint32_t *nb_rx_poll,
+		uint32_t *nb_rx_intr,
+		uint32_t *nb_wrr)
+{
+	rxa_calc_nb_post_poll_del(rx_adapter, dev_info, rx_queue_id, nb_rx_poll,
+				nb_wrr);
+	rxa_calc_nb_post_intr_del(rx_adapter, dev_info, rx_queue_id,
+				nb_rx_intr);
+}
+
+/*
+ * Allocate the rx_poll array
+ */
+static struct eth_rx_poll_entry *
+rxa_alloc_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+	uint32_t num_rx_polled)
+{
+	size_t len;
+
+	len  = RTE_ALIGN(num_rx_polled * sizeof(*rx_adapter->eth_rx_poll),
+							RTE_CACHE_LINE_SIZE);
+	return  rte_zmalloc_socket(rx_adapter->mem_name,
+				len,
+				RTE_CACHE_LINE_SIZE,
+				rx_adapter->socket_id);
+}
+
+/*
+ * Allocate the WRR array
+ */
+static uint32_t *
+rxa_alloc_wrr(struct rte_event_eth_rx_adapter *rx_adapter, int nb_wrr)
+{
+	size_t len;
+
+	len = RTE_ALIGN(nb_wrr * sizeof(*rx_adapter->wrr_sched),
+			RTE_CACHE_LINE_SIZE);
+	return  rte_zmalloc_socket(rx_adapter->mem_name,
+				len,
+				RTE_CACHE_LINE_SIZE,
+				rx_adapter->socket_id);
+}
+
+static int
+rxa_alloc_poll_arrays(struct rte_event_eth_rx_adapter *rx_adapter,
+		uint32_t nb_poll,
+		uint32_t nb_wrr,
+		struct eth_rx_poll_entry **rx_poll,
+		uint32_t **wrr_sched)
+{
+
+	if (nb_poll == 0) {
+		*rx_poll = NULL;
+		*wrr_sched = NULL;
+		return 0;
+	}
+
+	*rx_poll = rxa_alloc_poll(rx_adapter, nb_poll);
+	if (*rx_poll == NULL) {
+		*wrr_sched = NULL;
+		return -ENOMEM;
+	}
+
+	*wrr_sched = rxa_alloc_wrr(rx_adapter, nb_wrr);
+	if (*wrr_sched == NULL) {
+		rte_free(*rx_poll);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static void
+rxa_calc_wrr_sequence(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_rx_poll_entry *rx_poll,
+		uint32_t *rx_wrr)
+{
+	uint16_t d;
 	uint16_t q;
 	unsigned int i;
+	int prev = -1;
+	int cw = -1;
 
 	/* Initialize variables for calculation of wrr schedule */
 	uint16_t max_wrr_pos = 0;
@@ -196,79 +566,52 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
 	uint16_t max_wt = 0;
 	uint16_t gcd = 0;
 
-	struct eth_rx_poll_entry *rx_poll = NULL;
-	uint32_t *rx_wrr = NULL;
+	if (rx_poll == NULL)
+		return;
 
-	if (rx_adapter->num_rx_polled) {
-		size_t len = RTE_ALIGN(rx_adapter->num_rx_polled *
-				sizeof(*rx_adapter->eth_rx_poll),
-				RTE_CACHE_LINE_SIZE);
-		rx_poll = rte_zmalloc_socket(rx_adapter->mem_name,
-					     len,
-					     RTE_CACHE_LINE_SIZE,
-					     rx_adapter->socket_id);
-		if (rx_poll == NULL)
-			return -ENOMEM;
+	/* Generate array of all queues to poll, the size of this
+	 * array is poll_q
+	 */
+	RTE_ETH_FOREACH_DEV(d) {
+		uint16_t nb_rx_queues;
+		struct eth_device_info *dev_info =
+				&rx_adapter->eth_devices[d];
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		if (dev_info->rx_queue == NULL)
+			continue;
+		if (dev_info->internal_event_port)
+			continue;
+		dev_info->wrr_len = 0;
+		for (q = 0; q < nb_rx_queues; q++) {
+			struct eth_rx_queue_info *queue_info =
+				&dev_info->rx_queue[q];
+			uint16_t wt;
 
-		/* Generate array of all queues to poll, the size of this
-		 * array is poll_q
-		 */
-		for (d = 0; d < rte_eth_dev_count(); d++) {
-			uint16_t nb_rx_queues;
-			struct eth_device_info *dev_info =
-					&rx_adapter->eth_devices[d];
-			nb_rx_queues = dev_info->dev->data->nb_rx_queues;
-			if (dev_info->rx_queue == NULL)
+			if (!rxa_polled_queue(dev_info, q))
 				continue;
-			for (q = 0; q < nb_rx_queues; q++) {
-				struct eth_rx_queue_info *queue_info =
-					&dev_info->rx_queue[q];
-				if (queue_info->queue_enabled == 0)
-					continue;
-
-				uint16_t wt = queue_info->wt;
-				rx_poll[poll_q].eth_dev_id = d;
-				rx_poll[poll_q].eth_rx_qid = q;
-				max_wrr_pos += wt;
-				max_wt = RTE_MAX(max_wt, wt);
-				gcd = (gcd) ? gcd_u16(gcd, wt) : wt;
-				poll_q++;
-			}
-		}
-
-		len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr),
-				RTE_CACHE_LINE_SIZE);
-		rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name,
-					    len,
-					    RTE_CACHE_LINE_SIZE,
-					    rx_adapter->socket_id);
-		if (rx_wrr == NULL) {
-			rte_free(rx_poll);
-			return -ENOMEM;
-		}
-
-		/* Generate polling sequence based on weights */
-		int prev = -1;
-		int cw = -1;
-		for (i = 0; i < max_wrr_pos; i++) {
-			rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw,
-					     rx_poll, max_wt, gcd, prev);
-			prev = rx_wrr[i];
+			wt = queue_info->wt;
+			rx_poll[poll_q].eth_dev_id = d;
+			rx_poll[poll_q].eth_rx_qid = q;
+			max_wrr_pos += wt;
+			dev_info->wrr_len += wt;
+			max_wt = RTE_MAX(max_wt, wt);
+			gcd = (gcd) ? rxa_gcd_u16(gcd, wt) : wt;
+			poll_q++;
 		}
 	}
 
-	rte_free(rx_adapter->eth_rx_poll);
-	rte_free(rx_adapter->wrr_sched);
-
-	rx_adapter->eth_rx_poll = rx_poll;
-	rx_adapter->wrr_sched = rx_wrr;
-	rx_adapter->wrr_len = max_wrr_pos;
-
-	return 0;
+	/* Generate polling sequence based on weights */
+	prev = -1;
+	cw = -1;
+	for (i = 0; i < max_wrr_pos; i++) {
+		rx_wrr[i] = rxa_wrr_next(rx_adapter, poll_q, &cw,
+				     rx_poll, max_wt, gcd, prev);
+		prev = rx_wrr[i];
+	}
 }
 
 static inline void
-mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
+rxa_mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
 	struct ipv6_hdr **ipv6_hdr)
 {
 	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -307,7 +650,7 @@ mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
 
 /* Calculate RSS hash for IPv4/6 */
 static inline uint32_t
-do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
+rxa_do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 {
 	uint32_t input_len;
 	void *tuple;
@@ -316,7 +659,7 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 	struct ipv4_hdr *ipv4_hdr;
 	struct ipv6_hdr *ipv6_hdr;
 
-	mtoip(m, &ipv4_hdr, &ipv6_hdr);
+	rxa_mtoip(m, &ipv4_hdr, &ipv6_hdr);
 
 	if (ipv4_hdr) {
 		ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr);
@@ -335,13 +678,13 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 }
 
 static inline int
-rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	return !!rx_adapter->enq_block_count;
 }
 
 static inline void
-rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	if (rx_adapter->rx_enq_block_start_ts)
 		return;
@@ -354,13 +697,13 @@ rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
 }
 
 static inline void
-rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
 		    struct rte_event_eth_rx_adapter_stats *stats)
 {
 	if (unlikely(!stats->rx_enq_start_ts))
 		stats->rx_enq_start_ts = rte_get_tsc_cycles();
 
-	if (likely(!rx_enq_blocked(rx_adapter)))
+	if (likely(!rxa_enq_blocked(rx_adapter)))
 		return;
 
 	rx_adapter->enq_block_count = 0;
@@ -376,8 +719,8 @@ rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
  * this function
  */
 static inline void
-buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
-		  struct rte_event *ev)
+rxa_buffer_event(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct rte_event *ev)
 {
 	struct rte_eth_event_enqueue_buffer *buf =
 	    &rx_adapter->event_enqueue_buffer;
@@ -386,7 +729,7 @@ buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
 
 /* Enqueue buffered events to event device */
 static inline uint16_t
-flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	struct rte_eth_event_enqueue_buffer *buf =
 	    &rx_adapter->event_enqueue_buffer;
@@ -403,8 +746,8 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 		stats->rx_enq_retry++;
 	}
 
-	n ? rx_enq_block_end_ts(rx_adapter, stats) :
-		rx_enq_block_start_ts(rx_adapter);
+	n ? rxa_enq_block_end_ts(rx_adapter, stats) :
+		rxa_enq_block_start_ts(rx_adapter);
 
 	buf->count -= n;
 	stats->rx_enq_count += n;
@@ -413,18 +756,19 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 }
 
 static inline void
-fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
-	uint8_t dev_id,
-	uint16_t rx_queue_id,
-	struct rte_mbuf **mbufs,
-	uint16_t num)
+rxa_buffer_mbufs(struct rte_event_eth_rx_adapter *rx_adapter,
+		uint16_t eth_dev_id,
+		uint16_t rx_queue_id,
+		struct rte_mbuf **mbufs,
+		uint16_t num)
 {
 	uint32_t i;
-	struct eth_device_info *eth_device_info =
-					&rx_adapter->eth_devices[dev_id];
+	struct eth_device_info *dev_info =
+					&rx_adapter->eth_devices[eth_dev_id];
 	struct eth_rx_queue_info *eth_rx_queue_info =
-					&eth_device_info->rx_queue[rx_queue_id];
-
+					&dev_info->rx_queue[rx_queue_id];
+	struct rte_eth_event_enqueue_buffer *buf =
+					&rx_adapter->event_enqueue_buffer;
 	int32_t qid = eth_rx_queue_info->event_queue_id;
 	uint8_t sched_type = eth_rx_queue_info->sched_type;
 	uint8_t priority = eth_rx_queue_info->priority;
@@ -434,22 +778,48 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
 	uint32_t rss_mask;
 	uint32_t rss;
 	int do_rss;
+	uint64_t ts;
+	struct rte_mbuf *cb_mbufs[BATCH_SIZE];
+	uint16_t nb_cb;
 
 	/* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */
 	rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1);
 	do_rss = !rss_mask && !eth_rx_queue_info->flow_id_mask;
 
+	if ((m->ol_flags & PKT_RX_TIMESTAMP) == 0) {
+		ts = rte_get_tsc_cycles();
+		for (i = 0; i < num; i++) {
+			m = mbufs[i];
+
+			m->timestamp = ts;
+			m->ol_flags |= PKT_RX_TIMESTAMP;
+		}
+	}
+
+
+	nb_cb = dev_info->cb_fn ? dev_info->cb_fn(eth_dev_id, rx_queue_id,
+						ETH_EVENT_BUFFER_SIZE,
+						buf->count, mbufs,
+						num,
+						dev_info->cb_arg,
+						cb_mbufs) :
+						num;
+	if (nb_cb < num) {
+		mbufs = cb_mbufs;
+		num = nb_cb;
+	}
+
 	for (i = 0; i < num; i++) {
 		m = mbufs[i];
 		struct rte_event *ev = &events[i];
 
 		rss = do_rss ?
-			do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss;
+			rxa_do_softrss(m, rx_adapter->rss_key_be) :
+			m->hash.rss;
 		flow_id =
 		    eth_rx_queue_info->flow_id &
 				eth_rx_queue_info->flow_id_mask;
 		flow_id |= rss & ~eth_rx_queue_info->flow_id_mask;
-
 		ev->flow_id = flow_id;
 		ev->op = RTE_EVENT_OP_NEW;
 		ev->sched_type = sched_type;
@@ -459,8 +829,275 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
 		ev->priority = priority;
 		ev->mbuf = m;
 
-		buf_event_enqueue(rx_adapter, ev);
+		rxa_buffer_event(rx_adapter, ev);
+	}
+}
+
+/* Enqueue packets from  <port, q>  to event buffer */
+static inline uint32_t
+rxa_eth_rx(struct rte_event_eth_rx_adapter *rx_adapter,
+	uint16_t port_id,
+	uint16_t queue_id,
+	uint32_t rx_count,
+	uint32_t max_rx,
+	int *rxq_empty)
+{
+	struct rte_mbuf *mbufs[BATCH_SIZE];
+	struct rte_eth_event_enqueue_buffer *buf =
+					&rx_adapter->event_enqueue_buffer;
+	struct rte_event_eth_rx_adapter_stats *stats =
+					&rx_adapter->stats;
+	uint16_t n;
+	uint32_t nb_rx = 0;
+
+	if (rxq_empty)
+		*rxq_empty = 0;
+	/* Don't do a batch dequeue from the rx queue if there isn't
+	 * enough space in the enqueue buffer.
+	 */
+	while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+		if (buf->count >= BATCH_SIZE)
+			rxa_flush_event_buffer(rx_adapter);
+
+		stats->rx_poll_count++;
+		n = rte_eth_rx_burst(port_id, queue_id, mbufs, BATCH_SIZE);
+		if (unlikely(!n)) {
+			if (rxq_empty)
+				*rxq_empty = 1;
+			break;
+		}
+		rxa_buffer_mbufs(rx_adapter, port_id, queue_id, mbufs, n);
+		nb_rx += n;
+		if (rx_count + nb_rx > max_rx)
+			break;
+	}
+
+	if (buf->count >= BATCH_SIZE)
+		rxa_flush_event_buffer(rx_adapter);
+
+	return nb_rx;
+}
+
+static inline void
+rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
+		void *data)
+{
+	uint16_t port_id;
+	uint16_t queue;
+	int err;
+	union queue_data qd;
+	struct eth_device_info *dev_info;
+	struct eth_rx_queue_info *queue_info;
+	int *intr_enabled;
+
+	qd.ptr = data;
+	port_id = qd.port;
+	queue = qd.queue;
+
+	dev_info = &rx_adapter->eth_devices[port_id];
+	queue_info = &dev_info->rx_queue[queue];
+	rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+	if (rxa_shared_intr(dev_info, queue))
+		intr_enabled = &dev_info->shared_intr_enabled;
+	else
+		intr_enabled = &queue_info->intr_enabled;
+
+	if (*intr_enabled) {
+		*intr_enabled = 0;
+		err = rte_ring_enqueue(rx_adapter->intr_ring, data);
+		/* Entry should always be available.
+		 * The ring size equals the maximum number of interrupt
+		 * vectors supported (an interrupt vector is shared in
+		 * case of shared interrupts)
+		 */
+		if (err)
+			RTE_EDEV_LOG_ERR("Failed to enqueue interrupt"
+				" to ring: %s", strerror(err));
+		else
+			rte_eth_dev_rx_intr_disable(port_id, queue);
+	}
+	rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+static int
+rxa_intr_ring_check_avail(struct rte_event_eth_rx_adapter *rx_adapter,
+			uint32_t num_intr_vec)
+{
+	if (rx_adapter->num_intr_vec + num_intr_vec >
+				RTE_EVENT_ETH_INTR_RING_SIZE) {
+		RTE_EDEV_LOG_ERR("Exceeded intr ring slots current"
+		" %d needed %d limit %d", rx_adapter->num_intr_vec,
+		num_intr_vec, RTE_EVENT_ETH_INTR_RING_SIZE);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+/* Delete entries for (dev, queue) from the interrupt ring */
+static void
+rxa_intr_ring_del_entries(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			uint16_t rx_queue_id)
+{
+	int i, n;
+	union queue_data qd;
+
+	rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+
+	n = rte_ring_count(rx_adapter->intr_ring);
+	for (i = 0; i < n; i++) {
+		rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+		if (!rxa_shared_intr(dev_info, rx_queue_id)) {
+			if (qd.port == dev_info->dev->data->port_id &&
+				qd.queue == rx_queue_id)
+				continue;
+		} else {
+			if (qd.port == dev_info->dev->data->port_id)
+				continue;
+		}
+		rte_ring_enqueue(rx_adapter->intr_ring, qd.ptr);
+	}
+
+	rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+/* pthread callback handling interrupt mode receive queues
+ * After receiving an Rx interrupt, it enqueues the port id and queue id of the
+ * interrupting queue to the adapter's ring buffer for interrupt events.
+ * These events are picked up by rxa_intr_ring_dequeue() which is invoked from
+ * the adapter service function.
+ */
+static void *
+rxa_intr_thread(void *arg)
+{
+	struct rte_event_eth_rx_adapter *rx_adapter = arg;
+	struct rte_epoll_event *epoll_events = rx_adapter->epoll_events;
+	int n, i;
+
+	while (1) {
+		n = rte_epoll_wait(rx_adapter->epd, epoll_events,
+				RTE_EVENT_ETH_INTR_RING_SIZE, -1);
+		if (unlikely(n < 0))
+			RTE_EDEV_LOG_ERR("rte_epoll_wait returned error %d",
+					n);
+		for (i = 0; i < n; i++) {
+			rxa_intr_ring_enqueue(rx_adapter,
+					epoll_events[i].epdata.data);
+		}
+	}
+
+	return NULL;
+}
+
+/* Dequeue <port, q> from interrupt ring and enqueue received
+ * mbufs to eventdev
+ */
+static inline uint32_t
+rxa_intr_ring_dequeue(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	uint32_t n;
+	uint32_t nb_rx = 0;
+	int rxq_empty;
+	struct rte_eth_event_enqueue_buffer *buf;
+	rte_spinlock_t *ring_lock;
+	uint8_t max_done = 0;
+
+	if (rx_adapter->num_rx_intr == 0)
+		return 0;
+
+	if (rte_ring_count(rx_adapter->intr_ring) == 0
+		&& !rx_adapter->qd_valid)
+		return 0;
+
+	buf = &rx_adapter->event_enqueue_buffer;
+	ring_lock = &rx_adapter->intr_ring_lock;
+
+	if (buf->count >= BATCH_SIZE)
+		rxa_flush_event_buffer(rx_adapter);
+
+	while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+		struct eth_device_info *dev_info;
+		uint16_t port;
+		uint16_t queue;
+		union queue_data qd  = rx_adapter->qd;
+		int err;
+
+		if (!rx_adapter->qd_valid) {
+			struct eth_rx_queue_info *queue_info;
+
+			rte_spinlock_lock(ring_lock);
+			err = rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+			if (err) {
+				rte_spinlock_unlock(ring_lock);
+				break;
+			}
+
+			port = qd.port;
+			queue = qd.queue;
+			rx_adapter->qd = qd;
+			rx_adapter->qd_valid = 1;
+			dev_info = &rx_adapter->eth_devices[port];
+			if (rxa_shared_intr(dev_info, queue))
+				dev_info->shared_intr_enabled = 1;
+			else {
+				queue_info = &dev_info->rx_queue[queue];
+				queue_info->intr_enabled = 1;
+			}
+			rte_eth_dev_rx_intr_enable(port, queue);
+			rte_spinlock_unlock(ring_lock);
+		} else {
+			port = qd.port;
+			queue = qd.queue;
+
+			dev_info = &rx_adapter->eth_devices[port];
+		}
+
+		if (rxa_shared_intr(dev_info, queue)) {
+			uint16_t i;
+			uint16_t nb_queues;
+
+			nb_queues = dev_info->dev->data->nb_rx_queues;
+			n = 0;
+			for (i = dev_info->next_q_idx; i < nb_queues; i++) {
+				uint8_t enq_buffer_full;
+
+				if (!rxa_intr_queue(dev_info, i))
+					continue;
+				n = rxa_eth_rx(rx_adapter, port, i, nb_rx,
+					rx_adapter->max_nb_rx,
+					&rxq_empty);
+				nb_rx += n;
+
+				enq_buffer_full = !rxq_empty && n == 0;
+				max_done = nb_rx > rx_adapter->max_nb_rx;
+
+				if (enq_buffer_full || max_done) {
+					dev_info->next_q_idx = i;
+					goto done;
+				}
+			}
+
+			rx_adapter->qd_valid = 0;
+
+			/* Reinitialize for next interrupt */
+			dev_info->next_q_idx = dev_info->multi_intr_cap ?
+						RTE_MAX_RXTX_INTR_VEC_ID - 1 :
+						0;
+		} else {
+			n = rxa_eth_rx(rx_adapter, port, queue, nb_rx,
+				rx_adapter->max_nb_rx,
+				&rxq_empty);
+			rx_adapter->qd_valid = !rxq_empty;
+			nb_rx += n;
+			if (nb_rx > rx_adapter->max_nb_rx)
+				break;
+		}
 	}
+
+done:
+	rx_adapter->stats.rx_intr_packets += nb_rx;
+	return nb_rx;
 }
 
 /*
@@ -477,12 +1114,10 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
  * it.
  */
 static inline uint32_t
-eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	uint32_t num_queue;
-	uint16_t n;
 	uint32_t nb_rx = 0;
-	struct rte_mbuf *mbufs[BATCH_SIZE];
 	struct rte_eth_event_enqueue_buffer *buf;
 	uint32_t wrr_pos;
 	uint32_t max_nb_rx;
@@ -490,57 +1125,54 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 	wrr_pos = rx_adapter->wrr_pos;
 	max_nb_rx = rx_adapter->max_nb_rx;
 	buf = &rx_adapter->event_enqueue_buffer;
-	struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats;
+	stats = &rx_adapter->stats;
 
 	/* Iterate through a WRR sequence */
 	for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
 		unsigned int poll_idx = rx_adapter->wrr_sched[wrr_pos];
 		uint16_t qid = rx_adapter->eth_rx_poll[poll_idx].eth_rx_qid;
-		uint8_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id;
+		uint16_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id;
 
 		/* Don't do a batch dequeue from the rx queue if there isn't
 		 * enough space in the enqueue buffer.
 		 */
 		if (buf->count >= BATCH_SIZE)
-			flush_event_buffer(rx_adapter);
-		if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count))
-			break;
-
-		stats->rx_poll_count++;
-		n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE);
+			rxa_flush_event_buffer(rx_adapter);
+		if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) {
+			rx_adapter->wrr_pos = wrr_pos;
+			return nb_rx;
+		}
 
-		if (n) {
-			stats->rx_packets += n;
-			/* The check before rte_eth_rx_burst() ensures that
-			 * all n mbufs can be buffered
-			 */
-			fill_event_buffer(rx_adapter, d, qid, mbufs, n);
-			nb_rx += n;
-			if (nb_rx > max_nb_rx) {
-				rx_adapter->wrr_pos =
+		nb_rx += rxa_eth_rx(rx_adapter, d, qid, nb_rx, max_nb_rx,
+				NULL);
+		if (nb_rx > max_nb_rx) {
+			rx_adapter->wrr_pos =
 				    (wrr_pos + 1) % rx_adapter->wrr_len;
-				return nb_rx;
-			}
+			break;
 		}
 
 		if (++wrr_pos == rx_adapter->wrr_len)
 			wrr_pos = 0;
 	}
-
 	return nb_rx;
 }
 
 static int
-event_eth_rx_adapter_service_func(void *args)
+rxa_service_func(void *args)
 {
 	struct rte_event_eth_rx_adapter *rx_adapter = args;
-	struct rte_eth_event_enqueue_buffer *buf;
+	struct rte_event_eth_rx_adapter_stats *stats;
 
-	buf = &rx_adapter->event_enqueue_buffer;
 	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
 		return 0;
-	if (eth_rx_poll(rx_adapter) == 0 && buf->count)
-		flush_event_buffer(rx_adapter);
+	if (!rx_adapter->rxa_started) {
+		return 0;
+		rte_spinlock_unlock(&rx_adapter->rx_lock);
+	}
+
+	stats = &rx_adapter->stats;
+	stats->rx_packets += rxa_intr_ring_dequeue(rx_adapter);
+	stats->rx_packets += rxa_poll(rx_adapter);
 	rte_spinlock_unlock(&rx_adapter->rx_lock);
 	return 0;
 }
@@ -572,14 +1204,14 @@ rte_event_eth_rx_adapter_init(void)
 }
 
 static inline struct rte_event_eth_rx_adapter *
-id_to_rx_adapter(uint8_t id)
+rxa_id_to_adapter(uint8_t id)
 {
 	return event_eth_rx_adapter ?
 		event_eth_rx_adapter[id] : NULL;
 }
 
 static int
-default_conf_cb(uint8_t id, uint8_t dev_id,
+rxa_default_conf_cb(uint8_t id, uint8_t dev_id,
 		struct rte_event_eth_rx_adapter_conf *conf, void *arg)
 {
 	int ret;
@@ -588,7 +1220,7 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
 	int started;
 	uint8_t port_id;
 	struct rte_event_port_conf *port_conf = arg;
-	struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id);
+	struct rte_event_eth_rx_adapter *rx_adapter = rxa_id_to_adapter(id);
 
 	dev = &rte_eventdevs[rx_adapter->eventdev_id];
 	dev_conf = dev->data->dev_conf;
@@ -625,7 +1257,351 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
 }
 
 static int
-init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
+rxa_epoll_create1(void)
+{
+#if defined(LINUX)
+	int fd;
+	fd = epoll_create1(EPOLL_CLOEXEC);
+	return fd < 0 ? -errno : fd;
+#elif defined(BSD)
+	return -ENOTSUP;
+#endif
+}
+
+static int
+rxa_init_epd(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	if (rx_adapter->epd != INIT_FD)
+		return 0;
+
+	rx_adapter->epd = rxa_epoll_create1();
+	if (rx_adapter->epd < 0) {
+		int err = rx_adapter->epd;
+		rx_adapter->epd = INIT_FD;
+		RTE_EDEV_LOG_ERR("epoll_create1() failed, err %d", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+rxa_create_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int err;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (rx_adapter->intr_ring)
+		return 0;
+
+	rx_adapter->intr_ring = rte_ring_create("intr_ring",
+					RTE_EVENT_ETH_INTR_RING_SIZE,
+					rte_socket_id(), 0);
+	if (!rx_adapter->intr_ring)
+		return -ENOMEM;
+
+	rx_adapter->epoll_events = rte_zmalloc_socket(rx_adapter->mem_name,
+					RTE_EVENT_ETH_INTR_RING_SIZE *
+					sizeof(struct rte_epoll_event),
+					RTE_CACHE_LINE_SIZE,
+					rx_adapter->socket_id);
+	if (!rx_adapter->epoll_events) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	rte_spinlock_init(&rx_adapter->intr_ring_lock);
+
+	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+			"rx-intr-thread-%d", rx_adapter->id);
+
+	err = rte_ctrl_thread_create(&rx_adapter->rx_intr_thread, thread_name,
+				NULL, rxa_intr_thread, rx_adapter);
+	if (!err) {
+		rte_thread_setname(rx_adapter->rx_intr_thread, thread_name);
+		return 0;
+	}
+
+	RTE_EDEV_LOG_ERR("Failed to create interrupt thread err = %d\n", err);
+error:
+	rte_ring_free(rx_adapter->intr_ring);
+	rx_adapter->intr_ring = NULL;
+	rx_adapter->epoll_events = NULL;
+	return err;
+}
+
+static int
+rxa_destroy_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int err;
+
+	err = pthread_cancel(rx_adapter->rx_intr_thread);
+	if (err)
+		RTE_EDEV_LOG_ERR("Can't cancel interrupt thread err = %d\n",
+				err);
+
+	err = pthread_join(rx_adapter->rx_intr_thread, NULL);
+	if (err)
+		RTE_EDEV_LOG_ERR("Can't join interrupt thread err = %d\n", err);
+
+	rte_free(rx_adapter->epoll_events);
+	rte_ring_free(rx_adapter->intr_ring);
+	rx_adapter->intr_ring = NULL;
+	rx_adapter->epoll_events = NULL;
+	return 0;
+}
+
+static int
+rxa_free_intr_resources(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int ret;
+
+	if (rx_adapter->num_rx_intr == 0)
+		return 0;
+
+	ret = rxa_destroy_intr_thread(rx_adapter);
+	if (ret)
+		return ret;
+
+	close(rx_adapter->epd);
+	rx_adapter->epd = INIT_FD;
+
+	return ret;
+}
+
+static int
+rxa_disable_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	uint16_t rx_queue_id)
+{
+	int err;
+	uint16_t eth_dev_id = dev_info->dev->data->port_id;
+	int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+	err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Could not disable interrupt for Rx queue %u",
+			rx_queue_id);
+		return err;
+	}
+
+	err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_DEL,
+					0);
+	if (err)
+		RTE_EDEV_LOG_ERR("Interrupt event deletion failed %d", err);
+
+	if (sintr)
+		dev_info->rx_queue[rx_queue_id].intr_enabled = 0;
+	else
+		dev_info->shared_intr_enabled = 0;
+	return err;
+}
+
+static int
+rxa_del_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id)
+{
+	int err;
+	int i;
+	int s;
+
+	if (dev_info->nb_rx_intr == 0)
+		return 0;
+
+	err = 0;
+	if (rx_queue_id == -1) {
+		s = dev_info->nb_shared_intr;
+		for (i = 0; i < dev_info->nb_rx_intr; i++) {
+			int sintr;
+			uint16_t q;
+
+			q = dev_info->intr_queue[i];
+			sintr = rxa_shared_intr(dev_info, q);
+			s -= sintr;
+
+			if (!sintr || s == 0) {
+
+				err = rxa_disable_intr(rx_adapter, dev_info,
+						q);
+				if (err)
+					return err;
+				rxa_intr_ring_del_entries(rx_adapter, dev_info,
+							q);
+			}
+		}
+	} else {
+		if (!rxa_intr_queue(dev_info, rx_queue_id))
+			return 0;
+		if (!rxa_shared_intr(dev_info, rx_queue_id) ||
+				dev_info->nb_shared_intr == 1) {
+			err = rxa_disable_intr(rx_adapter, dev_info,
+					rx_queue_id);
+			if (err)
+				return err;
+			rxa_intr_ring_del_entries(rx_adapter, dev_info,
+						rx_queue_id);
+		}
+
+		for (i = 0; i < dev_info->nb_rx_intr; i++) {
+			if (dev_info->intr_queue[i] == rx_queue_id) {
+				for (; i < dev_info->nb_rx_intr - 1; i++)
+					dev_info->intr_queue[i] =
+						dev_info->intr_queue[i + 1];
+				break;
+			}
+		}
+	}
+
+	return err;
+}
+
+static int
+rxa_config_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	uint16_t rx_queue_id)
+{
+	int err, err1;
+	uint16_t eth_dev_id = dev_info->dev->data->port_id;
+	union queue_data qd;
+	int init_fd;
+	uint16_t *intr_queue;
+	int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+	if (rxa_intr_queue(dev_info, rx_queue_id))
+		return 0;
+
+	intr_queue = dev_info->intr_queue;
+	if (dev_info->intr_queue == NULL) {
+		size_t len =
+			dev_info->dev->data->nb_rx_queues * sizeof(uint16_t);
+		dev_info->intr_queue =
+			rte_zmalloc_socket(
+				rx_adapter->mem_name,
+				len,
+				0,
+				rx_adapter->socket_id);
+		if (dev_info->intr_queue == NULL)
+			return -ENOMEM;
+	}
+
+	init_fd = rx_adapter->epd;
+	err = rxa_init_epd(rx_adapter);
+	if (err)
+		goto err_free_queue;
+
+	qd.port = eth_dev_id;
+	qd.queue = rx_queue_id;
+
+	err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_ADD,
+					qd.ptr);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Failed to add interrupt event for"
+			" Rx Queue %u err %d", rx_queue_id, err);
+		goto err_del_fd;
+	}
+
+	err = rte_eth_dev_rx_intr_enable(eth_dev_id, rx_queue_id);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Could not enable interrupt for"
+				" Rx Queue %u err %d", rx_queue_id, err);
+
+		goto err_del_event;
+	}
+
+	err = rxa_create_intr_thread(rx_adapter);
+	if (!err)  {
+		if (sintr)
+			dev_info->shared_intr_enabled = 1;
+		else
+			dev_info->rx_queue[rx_queue_id].intr_enabled = 1;
+		return 0;
+	}
+
+
+	err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+	if (err)
+		RTE_EDEV_LOG_ERR("Could not disable interrupt for"
+				" Rx Queue %u err %d", rx_queue_id, err);
+err_del_event:
+	err1 = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_DEL,
+					0);
+	if (err1) {
+		RTE_EDEV_LOG_ERR("Could not delete event for"
+				" Rx Queue %u err %d", rx_queue_id, err1);
+	}
+err_del_fd:
+	if (init_fd == INIT_FD) {
+		close(rx_adapter->epd);
+		rx_adapter->epd = -1;
+	}
+err_free_queue:
+	if (intr_queue == NULL)
+		rte_free(dev_info->intr_queue);
+
+	return err;
+}
+
+static int
+rxa_add_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int rx_queue_id)
+
+{
+	int i, j, err;
+	int si = -1;
+	int shared_done = (dev_info->nb_shared_intr > 0);
+
+	if (rx_queue_id != -1) {
+		if (rxa_shared_intr(dev_info, rx_queue_id) && shared_done)
+			return 0;
+		return rxa_config_intr(rx_adapter, dev_info, rx_queue_id);
+	}
+
+	err = 0;
+	for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) {
+
+		if (rxa_shared_intr(dev_info, i) && shared_done)
+			continue;
+
+		err = rxa_config_intr(rx_adapter, dev_info, i);
+
+		shared_done = err == 0 && rxa_shared_intr(dev_info, i);
+		if (shared_done) {
+			si = i;
+			dev_info->shared_intr_enabled = 1;
+		}
+		if (err)
+			break;
+	}
+
+	if (err == 0)
+		return 0;
+
+	shared_done = (dev_info->nb_shared_intr > 0);
+	for (j = 0; j < i; j++) {
+		if (rxa_intr_queue(dev_info, j))
+			continue;
+		if (rxa_shared_intr(dev_info, j) && si != j)
+			continue;
+		err = rxa_disable_intr(rx_adapter, dev_info, j);
+		if (err)
+			break;
+
+	}
+
+	return err;
+}
+
+
+static int
+rxa_init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 {
 	int ret;
 	struct rte_service_spec service;
@@ -638,7 +1614,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 	snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN,
 		"rte_event_eth_rx_adapter_%d", id);
 	service.socket_id = rx_adapter->socket_id;
-	service.callback = event_eth_rx_adapter_service_func;
+	service.callback = rxa_service_func;
 	service.callback_userdata = rx_adapter;
 	/* Service function handles locking for queue add/del updates */
 	service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
@@ -659,6 +1635,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 	rx_adapter->event_port_id = rx_adapter_conf.event_port_id;
 	rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx;
 	rx_adapter->service_inited = 1;
+	rx_adapter->epd = INIT_FD;
 	return 0;
 
 err_done:
@@ -666,9 +1643,8 @@ err_done:
 	return ret;
 }
 
-
 static void
-update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_update_queue(struct rte_event_eth_rx_adapter *rx_adapter,
 		struct eth_device_info *dev_info,
 		int32_t rx_queue_id,
 		uint8_t add)
@@ -682,7 +1658,7 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
 
 	if (rx_queue_id == -1) {
 		for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-			update_queue_info(rx_adapter, dev_info, i, add);
+			rxa_update_queue(rx_adapter, dev_info, i, add);
 	} else {
 		queue_info = &dev_info->rx_queue[rx_queue_id];
 		enabled = queue_info->queue_enabled;
@@ -697,31 +1673,65 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
 	}
 }
 
-static int
-event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter,
-			    struct eth_device_info *dev_info,
-			    uint16_t rx_queue_id)
+static void
+rxa_sw_del(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int32_t rx_queue_id)
 {
-	struct eth_rx_queue_info *queue_info;
+	int pollq;
+	int intrq;
+	int sintrq;
+
 
 	if (rx_adapter->nb_queues == 0)
-		return 0;
+		return;
 
-	queue_info = &dev_info->rx_queue[rx_queue_id];
-	rx_adapter->num_rx_polled -= queue_info->queue_enabled;
-	update_queue_info(rx_adapter, dev_info, rx_queue_id, 0);
-	return 0;
+	if (rx_queue_id == -1) {
+		uint16_t nb_rx_queues;
+		uint16_t i;
+
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		for (i = 0; i <	nb_rx_queues; i++)
+			rxa_sw_del(rx_adapter, dev_info, i);
+		return;
+	}
+
+	pollq = rxa_polled_queue(dev_info, rx_queue_id);
+	intrq = rxa_intr_queue(dev_info, rx_queue_id);
+	sintrq = rxa_shared_intr(dev_info, rx_queue_id);
+	rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 0);
+	rx_adapter->num_rx_polled -= pollq;
+	dev_info->nb_rx_poll -= pollq;
+	rx_adapter->num_rx_intr -= intrq;
+	dev_info->nb_rx_intr -= intrq;
+	dev_info->nb_shared_intr -= intrq && sintrq;
 }
 
 static void
-event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
-		struct eth_device_info *dev_info,
-		uint16_t rx_queue_id,
-		const struct rte_event_eth_rx_adapter_queue_conf *conf)
-
+rxa_add_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int32_t rx_queue_id,
+	const struct rte_event_eth_rx_adapter_queue_conf *conf)
 {
 	struct eth_rx_queue_info *queue_info;
 	const struct rte_event *ev = &conf->ev;
+	int pollq;
+	int intrq;
+	int sintrq;
+
+	if (rx_queue_id == -1) {
+		uint16_t nb_rx_queues;
+		uint16_t i;
+
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		for (i = 0; i <	nb_rx_queues; i++)
+			rxa_add_queue(rx_adapter, dev_info, i, conf);
+		return;
+	}
+
+	pollq = rxa_polled_queue(dev_info, rx_queue_id);
+	intrq = rxa_intr_queue(dev_info, rx_queue_id);
+	sintrq = rxa_shared_intr(dev_info, rx_queue_id);
 
 	queue_info = &dev_info->rx_queue[rx_queue_id];
 	queue_info->event_queue_id = ev->queue_id;
@@ -735,69 +1745,162 @@ event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
 		queue_info->flow_id_mask = ~0;
 	}
 
-	/* The same queue can be added more than once */
-	rx_adapter->num_rx_polled += !queue_info->queue_enabled;
-	update_queue_info(rx_adapter, dev_info, rx_queue_id, 1);
+	rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 1);
+	if (rxa_polled_queue(dev_info, rx_queue_id)) {
+		rx_adapter->num_rx_polled += !pollq;
+		dev_info->nb_rx_poll += !pollq;
+		rx_adapter->num_rx_intr -= intrq;
+		dev_info->nb_rx_intr -= intrq;
+		dev_info->nb_shared_intr -= intrq && sintrq;
+	}
+
+	if (rxa_intr_queue(dev_info, rx_queue_id)) {
+		rx_adapter->num_rx_polled -= pollq;
+		dev_info->nb_rx_poll -= pollq;
+		rx_adapter->num_rx_intr += !intrq;
+		dev_info->nb_rx_intr += !intrq;
+		dev_info->nb_shared_intr += !intrq && sintrq;
+		if (dev_info->nb_shared_intr == 1) {
+			if (dev_info->multi_intr_cap)
+				dev_info->next_q_idx =
+					RTE_MAX_RXTX_INTR_VEC_ID - 1;
+			else
+				dev_info->next_q_idx = 0;
+		}
+	}
 }
 
-static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter,
-		uint8_t eth_dev_id,
+static int rxa_sw_add(struct rte_event_eth_rx_adapter *rx_adapter,
+		uint16_t eth_dev_id,
 		int rx_queue_id,
 		const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
 {
 	struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id];
 	struct rte_event_eth_rx_adapter_queue_conf temp_conf;
-	uint32_t i;
 	int ret;
+	struct eth_rx_poll_entry *rx_poll;
+	struct eth_rx_queue_info *rx_queue;
+	uint32_t *rx_wrr;
+	uint16_t nb_rx_queues;
+	uint32_t nb_rx_poll, nb_wrr;
+	uint32_t nb_rx_intr;
+	int num_intr_vec;
+	uint16_t wt;
 
 	if (queue_conf->servicing_weight == 0) {
-
 		struct rte_eth_dev_data *data = dev_info->dev->data;
-		if (data->dev_conf.intr_conf.rxq) {
-			RTE_EDEV_LOG_ERR("Interrupt driven queues"
-					" not supported");
-			return -ENOTSUP;
-		}
-		temp_conf = *queue_conf;
 
-		/* If Rx interrupts are disabled set wt = 1 */
-		temp_conf.servicing_weight = 1;
+		temp_conf = *queue_conf;
+		if (!data->dev_conf.intr_conf.rxq) {
+			/* If Rx interrupts are disabled set wt = 1 */
+			temp_conf.servicing_weight = 1;
+		}
 		queue_conf = &temp_conf;
 	}
 
+	nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+	rx_queue = dev_info->rx_queue;
+	wt = queue_conf->servicing_weight;
+
 	if (dev_info->rx_queue == NULL) {
 		dev_info->rx_queue =
 		    rte_zmalloc_socket(rx_adapter->mem_name,
-				       dev_info->dev->data->nb_rx_queues *
+				       nb_rx_queues *
 				       sizeof(struct eth_rx_queue_info), 0,
 				       rx_adapter->socket_id);
 		if (dev_info->rx_queue == NULL)
 			return -ENOMEM;
 	}
+	rx_wrr = NULL;
+	rx_poll = NULL;
 
-	if (rx_queue_id == -1) {
-		for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-			event_eth_rx_adapter_queue_add(rx_adapter,
-						dev_info, i,
-						queue_conf);
+	rxa_calc_nb_post_add(rx_adapter, dev_info, rx_queue_id,
+			queue_conf->servicing_weight,
+			&nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+	if (dev_info->dev->intr_handle)
+		dev_info->multi_intr_cap =
+			rte_intr_cap_multiple(dev_info->dev->intr_handle);
+
+	ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+				&rx_poll, &rx_wrr);
+	if (ret)
+		goto err_free_rxqueue;
+
+	if (wt == 0) {
+		num_intr_vec = rxa_nb_intr_vect(dev_info, rx_queue_id, 1);
+
+		ret = rxa_intr_ring_check_avail(rx_adapter, num_intr_vec);
+		if (ret)
+			goto err_free_rxqueue;
+
+		ret = rxa_add_intr_queue(rx_adapter, dev_info, rx_queue_id);
+		if (ret)
+			goto err_free_rxqueue;
 	} else {
-		event_eth_rx_adapter_queue_add(rx_adapter, dev_info,
-					  (uint16_t)rx_queue_id,
-					  queue_conf);
+
+		num_intr_vec = 0;
+		if (rx_adapter->num_rx_intr > nb_rx_intr) {
+			num_intr_vec = rxa_nb_intr_vect(dev_info,
+						rx_queue_id, 0);
+			/* interrupt based queues are being converted to
+			 * poll mode queues, delete the interrupt configuration
+			 * for those.
+			 */
+			ret = rxa_del_intr_queue(rx_adapter,
+						dev_info, rx_queue_id);
+			if (ret)
+				goto err_free_rxqueue;
+		}
 	}
 
-	ret = eth_poll_wrr_calc(rx_adapter);
-	if (ret) {
-		event_eth_rx_adapter_queue_del(rx_adapter,
-					dev_info, rx_queue_id);
-		return ret;
+	if (nb_rx_intr == 0) {
+		ret = rxa_free_intr_resources(rx_adapter);
+		if (ret)
+			goto err_free_rxqueue;
 	}
 
-	return ret;
+	if (wt == 0) {
+		uint16_t i;
+
+		if (rx_queue_id  == -1) {
+			for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+				dev_info->intr_queue[i] = i;
+		} else {
+			if (!rxa_intr_queue(dev_info, rx_queue_id))
+				dev_info->intr_queue[nb_rx_intr - 1] =
+					rx_queue_id;
+		}
+	}
+
+
+
+	rxa_add_queue(rx_adapter, dev_info, rx_queue_id, queue_conf);
+	rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+	rte_free(rx_adapter->eth_rx_poll);
+	rte_free(rx_adapter->wrr_sched);
+
+	rx_adapter->eth_rx_poll = rx_poll;
+	rx_adapter->wrr_sched = rx_wrr;
+	rx_adapter->wrr_len = nb_wrr;
+	rx_adapter->num_intr_vec += num_intr_vec;
+	return 0;
+
+err_free_rxqueue:
+	if (rx_queue == NULL) {
+		rte_free(dev_info->rx_queue);
+		dev_info->rx_queue = NULL;
+	}
+
+	rte_free(rx_poll);
+	rte_free(rx_wrr);
+
+	return 0;
 }
 
 static int
-rx_adapter_ctrl(uint8_t id, int start)
+rxa_ctrl(uint8_t id, int start)
 {
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct rte_eventdev *dev;
@@ -807,13 +1910,13 @@ rx_adapter_ctrl(uint8_t id, int start)
 	int stop = !start;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
 	dev = &rte_eventdevs[rx_adapter->eventdev_id];
 
-	for (i = 0; i < rte_eth_dev_count(); i++) {
+	RTE_ETH_FOREACH_DEV(i) {
 		dev_info = &rx_adapter->eth_devices[i];
 		/* if start  check for num dev queues */
 		if (start && !dev_info->nb_dev_queues)
@@ -831,8 +1934,12 @@ rx_adapter_ctrl(uint8_t id, int start)
 						&rte_eth_devices[i]);
 	}
 
-	if (use_service)
+	if (use_service) {
+		rte_spinlock_lock(&rx_adapter->rx_lock);
+		rx_adapter->rxa_started = start;
 		rte_service_runstate_set(rx_adapter->service_id, start);
+		rte_spinlock_unlock(&rx_adapter->rx_lock);
+	}
 
 	return 0;
 }
@@ -845,7 +1952,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	int ret;
 	int socket_id;
-	uint8_t i;
+	uint16_t i;
 	char mem_name[ETH_RX_ADAPTER_SERVICE_NAME_LEN];
 	const uint8_t default_rss_key[] = {
 		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
@@ -866,7 +1973,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 			return ret;
 	}
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter != NULL) {
 		RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id);
 		return -EEXIST;
@@ -888,9 +1995,11 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 	rx_adapter->socket_id = socket_id;
 	rx_adapter->conf_cb = conf_cb;
 	rx_adapter->conf_arg = conf_arg;
+	rx_adapter->id = id;
 	strcpy(rx_adapter->mem_name, mem_name);
 	rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
-					rte_eth_dev_count() *
+					/* FIXME: incompatible with hotplug */
+					rte_eth_dev_count_total() *
 					sizeof(struct eth_device_info), 0,
 					socket_id);
 	rte_convert_rss_key((const uint32_t *)default_rss_key,
@@ -903,11 +2012,11 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 		return -ENOMEM;
 	}
 	rte_spinlock_init(&rx_adapter->rx_lock);
-	for (i = 0; i < rte_eth_dev_count(); i++)
+	RTE_ETH_FOREACH_DEV(i)
 		rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
 
 	event_eth_rx_adapter[id] = rx_adapter;
-	if (conf_cb == default_conf_cb)
+	if (conf_cb == rxa_default_conf_cb)
 		rx_adapter->default_cb_arg = 1;
 	return 0;
 }
@@ -928,7 +2037,7 @@ rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id,
 		return -ENOMEM;
 	*pc = *port_config;
 	ret = rte_event_eth_rx_adapter_create_ext(id, dev_id,
-					default_conf_cb,
+					rxa_default_conf_cb,
 					pc);
 	if (ret)
 		rte_free(pc);
@@ -942,7 +2051,7 @@ rte_event_eth_rx_adapter_free(uint8_t id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -963,7 +2072,7 @@ rte_event_eth_rx_adapter_free(uint8_t id)
 
 int
 rte_event_eth_rx_adapter_queue_add(uint8_t id,
-		uint8_t eth_dev_id,
+		uint16_t eth_dev_id,
 		int32_t rx_queue_id,
 		const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
 {
@@ -972,12 +2081,11 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct rte_eventdev *dev;
 	struct eth_device_info *dev_info;
-	int start_service;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if ((rx_adapter == NULL) || (queue_conf == NULL))
 		return -EINVAL;
 
@@ -987,7 +2095,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 						&cap);
 	if (ret) {
 		RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8
-			"eth port %" PRIu8, id, eth_dev_id);
+			"eth port %" PRIu16, id, eth_dev_id);
 		return ret;
 	}
 
@@ -995,7 +2103,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 		&& (queue_conf->rx_queue_flags &
 			RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)) {
 		RTE_EDEV_LOG_ERR("Flow ID override is not supported,"
-				" eth port: %" PRIu8 " adapter id: %" PRIu8,
+				" eth port: %" PRIu16 " adapter id: %" PRIu8,
 				eth_dev_id, id);
 		return -EINVAL;
 	}
@@ -1003,7 +2111,8 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 	if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) == 0 &&
 		(rx_queue_id != -1)) {
 		RTE_EDEV_LOG_ERR("Rx queues can only be connected to single "
-			"event queue id %u eth port %u", id, eth_dev_id);
+			"event queue, eth port: %" PRIu16 " adapter id: %"
+			PRIu8, eth_dev_id, id);
 		return -EINVAL;
 	}
 
@@ -1014,7 +2123,6 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 		return -EINVAL;
 	}
 
-	start_service = 0;
 	dev_info = &rx_adapter->eth_devices[eth_dev_id];
 
 	if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
@@ -1034,33 +2142,34 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 				&rte_eth_devices[eth_dev_id],
 				rx_queue_id, queue_conf);
 		if (ret == 0) {
-			update_queue_info(rx_adapter,
+			dev_info->internal_event_port = 1;
+			rxa_update_queue(rx_adapter,
 					&rx_adapter->eth_devices[eth_dev_id],
 					rx_queue_id,
 					1);
 		}
 	} else {
 		rte_spinlock_lock(&rx_adapter->rx_lock);
-		ret = init_service(rx_adapter, id);
-		if (ret == 0)
-			ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id,
+		dev_info->internal_event_port = 0;
+		ret = rxa_init_service(rx_adapter, id);
+		if (ret == 0) {
+			uint32_t service_id = rx_adapter->service_id;
+			ret = rxa_sw_add(rx_adapter, eth_dev_id, rx_queue_id,
 					queue_conf);
+			rte_service_component_runstate_set(service_id,
+				rxa_sw_adapter_queue_count(rx_adapter));
+		}
 		rte_spinlock_unlock(&rx_adapter->rx_lock);
-		if (ret == 0)
-			start_service = !!sw_rx_adapter_queue_count(rx_adapter);
 	}
 
 	if (ret)
 		return ret;
 
-	if (start_service)
-		rte_service_component_runstate_set(rx_adapter->service_id, 1);
-
 	return 0;
 }
 
 int
-rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
+rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
 				int32_t rx_queue_id)
 {
 	int ret = 0;
@@ -1068,12 +2177,17 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct eth_device_info *dev_info;
 	uint32_t cap;
-	uint16_t i;
+	uint32_t nb_rx_poll = 0;
+	uint32_t nb_wrr = 0;
+	uint32_t nb_rx_intr;
+	struct eth_rx_poll_entry *rx_poll = NULL;
+	uint32_t *rx_wrr = NULL;
+	int num_intr_vec;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -1100,7 +2214,7 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
 						&rte_eth_devices[eth_dev_id],
 						rx_queue_id);
 		if (ret == 0) {
-			update_queue_info(rx_adapter,
+			rxa_update_queue(rx_adapter,
 					&rx_adapter->eth_devices[eth_dev_id],
 					rx_queue_id,
 					0);
@@ -1110,48 +2224,78 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
 			}
 		}
 	} else {
-		int rc;
+		rxa_calc_nb_post_del(rx_adapter, dev_info, rx_queue_id,
+			&nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+		ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+			&rx_poll, &rx_wrr);
+		if (ret)
+			return ret;
+
 		rte_spinlock_lock(&rx_adapter->rx_lock);
-		if (rx_queue_id == -1) {
-			for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-				event_eth_rx_adapter_queue_del(rx_adapter,
-							dev_info,
-							i);
-		} else {
-			event_eth_rx_adapter_queue_del(rx_adapter,
-						dev_info,
-						(uint16_t)rx_queue_id);
+
+		num_intr_vec = 0;
+		if (rx_adapter->num_rx_intr > nb_rx_intr) {
+
+			num_intr_vec = rxa_nb_intr_vect(dev_info,
+						rx_queue_id, 0);
+			ret = rxa_del_intr_queue(rx_adapter, dev_info,
+					rx_queue_id);
+			if (ret)
+				goto unlock_ret;
+		}
+
+		if (nb_rx_intr == 0) {
+			ret = rxa_free_intr_resources(rx_adapter);
+			if (ret)
+				goto unlock_ret;
+		}
+
+		rxa_sw_del(rx_adapter, dev_info, rx_queue_id);
+		rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+		rte_free(rx_adapter->eth_rx_poll);
+		rte_free(rx_adapter->wrr_sched);
+
+		if (nb_rx_intr == 0) {
+			rte_free(dev_info->intr_queue);
+			dev_info->intr_queue = NULL;
 		}
 
-		rc = eth_poll_wrr_calc(rx_adapter);
-		if (rc)
-			RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32,
-					rc);
+		rx_adapter->eth_rx_poll = rx_poll;
+		rx_adapter->wrr_sched = rx_wrr;
+		rx_adapter->wrr_len = nb_wrr;
+		rx_adapter->num_intr_vec += num_intr_vec;
 
 		if (dev_info->nb_dev_queues == 0) {
 			rte_free(dev_info->rx_queue);
 			dev_info->rx_queue = NULL;
 		}
-
+unlock_ret:
 		rte_spinlock_unlock(&rx_adapter->rx_lock);
+		if (ret) {
+			rte_free(rx_poll);
+			rte_free(rx_wrr);
+			return ret;
+		}
+
 		rte_service_component_runstate_set(rx_adapter->service_id,
-				sw_rx_adapter_queue_count(rx_adapter));
+				rxa_sw_adapter_queue_count(rx_adapter));
 	}
 
 	return ret;
 }
 
-
 int
 rte_event_eth_rx_adapter_start(uint8_t id)
 {
-	return rx_adapter_ctrl(id, 1);
+	return rxa_ctrl(id, 1);
 }
 
 int
 rte_event_eth_rx_adapter_stop(uint8_t id)
 {
-	return rx_adapter_ctrl(id, 0);
+	return rxa_ctrl(id, 0);
 }
 
 int
@@ -1168,13 +2312,13 @@ rte_event_eth_rx_adapter_stats_get(uint8_t id,
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter  == NULL || stats == NULL)
 		return -EINVAL;
 
 	dev = &rte_eventdevs[rx_adapter->eventdev_id];
 	memset(stats, 0, sizeof(*stats));
-	for (i = 0; i < rte_eth_dev_count(); i++) {
+	RTE_ETH_FOREACH_DEV(i) {
 		dev_info = &rx_adapter->eth_devices[i];
 		if (dev_info->internal_event_port == 0 ||
 			dev->dev_ops->eth_rx_adapter_stats_get == NULL)
@@ -1206,12 +2350,12 @@ rte_event_eth_rx_adapter_stats_reset(uint8_t id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
 	dev = &rte_eventdevs[rx_adapter->eventdev_id];
-	for (i = 0; i < rte_eth_dev_count(); i++) {
+	RTE_ETH_FOREACH_DEV(i) {
 		dev_info = &rx_adapter->eth_devices[i];
 		if (dev_info->internal_event_port == 0 ||
 			dev->dev_ops->eth_rx_adapter_stats_reset == NULL)
@@ -1231,7 +2375,7 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL || service_id == NULL)
 		return -EINVAL;
 
@@ -1240,3 +2384,47 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
 
 	return rx_adapter->service_inited ? 0 : -ESRCH;
 }
+
+int rte_event_eth_rx_adapter_cb_register(uint8_t id,
+					uint16_t eth_dev_id,
+					rte_event_eth_rx_adapter_cb_fn cb_fn,
+					void *cb_arg)
+{
+	struct rte_event_eth_rx_adapter *rx_adapter;
+	struct eth_device_info *dev_info;
+	uint32_t cap;
+	int ret;
+
+	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+
+	rx_adapter = rxa_id_to_adapter(id);
+	if (rx_adapter == NULL)
+		return -EINVAL;
+
+	dev_info = &rx_adapter->eth_devices[eth_dev_id];
+	if (dev_info->rx_queue == NULL)
+		return -EINVAL;
+
+	ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id,
+						eth_dev_id,
+						&cap);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8
+			"eth port %" PRIu16, id, eth_dev_id);
+		return ret;
+	}
+
+	if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
+		RTE_EDEV_LOG_ERR("Rx callback not supported for eth port %"
+				PRIu16, eth_dev_id);
+		return -EINVAL;
+	}
+
+	rte_spinlock_lock(&rx_adapter->rx_lock);
+	dev_info->cb_fn = cb_fn;
+	dev_info->cb_arg = cb_arg;
+	rte_spinlock_unlock(&rx_adapter->rx_lock);
+
+	return 0;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
index c20507b2..332ee216 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -1,32 +1,6 @@
-/*
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation.
+ * All rights reserved.
  */
 
 #ifndef _RTE_EVENT_ETH_RX_ADAPTER_
@@ -47,7 +21,11 @@
  *
  * The adapter uses a EAL service core function for SW based packet transfer
  * and uses the eventdev PMD functions to configure HW based packet transfer
- * between the ethernet device and the event device.
+ * between the ethernet device and the event device. For SW based packet
+ * transfer, if the mbuf does not have a timestamp set, the adapter adds a
+ * timestamp to the mbuf using rte_get_tsc_cycles(), this provides a more
+ * accurate timestamp as compared to if the application were to set the time
+ * stamp since it avoids event device schedule latency.
  *
  * The ethernet Rx event adapter's functions are:
  *  - rte_event_eth_rx_adapter_create_ext()
@@ -85,7 +63,23 @@
  * rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve
  * the service function ID of the adapter in this case.
  *
- * Note: Interrupt driven receive queues are currently unimplemented.
+ * For SW based packet transfers, i.e., when the
+ * RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is not set in the adapter's
+ * capabilities flags for a particular ethernet device, the service function
+ * temporarily enqueues mbufs to an event buffer before batch enqueueing these
+ * to the event device. If the buffer fills up, the service function stops
+ * dequeueing packets from the ethernet device. The application may want to
+ * monitor the buffer fill level and instruct the service function to
+ * selectively buffer packets. The application may also use some other
+ * criteria to decide which packets should enter the event device even when
+ * the event buffer fill level is low. The
+ * rte_event_eth_rx_adapter_cb_register() function allows the
+ * application to register a callback that selects which packets to enqueue
+ * to the event device.
+ *
+ * Note:
+ * 1) Devices created after an instance of rte_event_eth_rx_adapter_create
+ *  should be added to a new instance of the rx adapter.
  */
 
 #ifdef __cplusplus
@@ -218,12 +212,55 @@ struct rte_event_eth_rx_adapter_stats {
 	 * block cycles can be used to compute the percentage of
 	 * cycles the service is blocked by the event device.
 	 */
+	uint64_t rx_intr_packets;
+	/**< Received packet count for interrupt mode Rx queues */
 };
 
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Callback function invoked by the SW adapter before it continues
+ * to process packets. The callback is passed the size of the enqueue
+ * buffer in the SW adapter and the occupancy of the buffer. The
+ * callback can use these values to decide which mbufs should be
+ * enqueued to the event device. If the return value of the callback
+ * is less than nb_mbuf then the SW adapter uses the return value to
+ * enqueue enq_mbuf[] to the event device.
+ *
+ * @param eth_dev_id
+ *  Port identifier of the Ethernet device.
+ * @param queue_id
+ *  Receive queue index.
+ * @param enqueue_buf_size
+ *  Total enqueue buffer size.
+ * @param enqueue_buf_count
+ *  mbuf count in enqueue buffer.
+ * @param mbuf
+ *  mbuf array.
+ * @param nb_mbuf
+ *  mbuf count.
+ * @param cb_arg
+ *  Callback argument.
+ * @param[out] enq_mbuf
+ *  The adapter enqueues enq_mbuf[] if the return value of the
+ *  callback is less than nb_mbuf
+ * @return
+ *  Returns the number of mbufs should be enqueued to eventdev
+ */
+typedef uint16_t (*rte_event_eth_rx_adapter_cb_fn)(uint16_t eth_dev_id,
+						uint16_t queue_id,
+						uint32_t enqueue_buf_size,
+						uint32_t enqueue_buf_count,
+						struct rte_mbuf **mbuf,
+						uint16_t nb_mbuf,
+						void *cb_arg,
+						struct rte_mbuf **enq_buf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
  * Create a new ethernet Rx event adapter with the specified identifier.
  *
  * @param id
@@ -329,7 +366,7 @@ int rte_event_eth_rx_adapter_free(uint8_t id);
  *  combination of the two error codes.
  */
 int rte_event_eth_rx_adapter_queue_add(uint8_t id,
-			uint8_t eth_dev_id,
+			uint16_t eth_dev_id,
 			int32_t rx_queue_id,
 			const struct rte_event_eth_rx_adapter_queue_conf *conf);
 
@@ -357,7 +394,7 @@ int rte_event_eth_rx_adapter_queue_add(uint8_t id,
  *  - 0: Success, Receive queue deleted correctly.
  *  - <0: Error code on failure.
  */
-int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
+int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
 				       int32_t rx_queue_id);
 
 /**
@@ -444,6 +481,32 @@ int rte_event_eth_rx_adapter_stats_reset(uint8_t id);
  */
 int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register callback to process Rx packets, this is supported for
+ * SW based packet transfers.
+ * @see rte_event_eth_rx_cb_fn
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Port identifier of Ethernet device.
+ * @param cb_fn
+ *  Callback function.
+ * @param cb_arg
+ *  Callback arg.
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_rx_adapter_cb_register(uint8_t id,
+				uint16_t eth_dev_id,
+				rte_event_eth_rx_adapter_cb_fn cb_fn,
+				void *cb_arg);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c
index eb67751d..16d02a95 100644
--- a/lib/librte_eventdev/rte_event_ring.c
+++ b/lib/librte_eventdev/rte_event_ring.c
@@ -82,11 +82,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id,
 	mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
 	if (mz != NULL) {
 		r = mz->addr;
-		/*
-		 * no need to check return value here, we already checked the
-		 * arguments above
-		 */
-		rte_event_ring_init(r, name, requested_count, flags);
+		/* Check return value in case rte_ring_init() fails on size */
+		int err = rte_event_ring_init(r, name, requested_count, flags);
+		if (err) {
+			RTE_LOG(ERR, RING, "Ring init failed\n");
+			if (rte_memzone_free(mz) != 0)
+				RTE_LOG(ERR, RING, "Cannot free memzone\n");
+			rte_free(te);
+			rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+			return NULL;
+		}
 
 		te->data = (void *) r;
 		r->r.memzone = mz;
diff --git a/lib/librte_eventdev/rte_event_ring.h b/lib/librte_eventdev/rte_event_ring.h
index 29d4228a..827a3209 100644
--- a/lib/librte_eventdev/rte_event_ring.h
+++ b/lib/librte_eventdev/rte_event_ring.h
@@ -99,7 +99,7 @@ rte_event_ring_enqueue_burst(struct rte_event_ring *r,
 
 	ENQUEUE_PTRS(&r->r, &r[1], prod_head, events, n, struct rte_event);
 
-	update_tail(&r->r.prod, prod_head, prod_next, 1, 1);
+	update_tail(&r->r.prod, prod_head, prod_next, r->r.prod.single, 1);
 end:
 	if (free_space != NULL)
 		*free_space = free_entries - n;
@@ -140,7 +140,7 @@ rte_event_ring_dequeue_burst(struct rte_event_ring *r,
 
 	DEQUEUE_PTRS(&r->r, &r[1], cons_head, events, n, struct rte_event);
 
-	update_tail(&r->r.cons, cons_head, cons_next, 1, 0);
+	update_tail(&r->r.cons, cons_head, cons_next, r->r.cons.single, 0);
 
 end:
 	if (available != NULL)
diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c b/lib/librte_eventdev/rte_event_timer_adapter.c
new file mode 100644
index 00000000..79070d48
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_timer_adapter.c
@@ -0,0 +1,1299 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+#include <rte_timer.h>
+#include <rte_service_component.h>
+#include <rte_cycles.h>
+
+#include "rte_eventdev.h"
+#include "rte_eventdev_pmd.h"
+#include "rte_event_timer_adapter.h"
+#include "rte_event_timer_adapter_pmd.h"
+
+#define DATA_MZ_NAME_MAX_LEN 64
+#define DATA_MZ_NAME_FORMAT "rte_event_timer_adapter_data_%d"
+
+static int evtim_logtype;
+static int evtim_svc_logtype;
+static int evtim_buffer_logtype;
+
+static struct rte_event_timer_adapter adapters[RTE_EVENT_TIMER_ADAPTER_NUM_MAX];
+
+static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops;
+
+#define EVTIM_LOG(level, logtype, ...) \
+	rte_log(RTE_LOG_ ## level, logtype, \
+		RTE_FMT("EVTIMER: %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) \
+			"\n", __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
+
+#define EVTIM_LOG_ERR(...) EVTIM_LOG(ERR, evtim_logtype, __VA_ARGS__)
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+#define EVTIM_LOG_DBG(...) \
+	EVTIM_LOG(DEBUG, evtim_logtype, __VA_ARGS__)
+#define EVTIM_BUF_LOG_DBG(...) \
+	EVTIM_LOG(DEBUG, evtim_buffer_logtype, __VA_ARGS__)
+#define EVTIM_SVC_LOG_DBG(...) \
+	EVTIM_LOG(DEBUG, evtim_svc_logtype, __VA_ARGS__)
+#else
+#define EVTIM_LOG_DBG(...) (void)0
+#define EVTIM_BUF_LOG_DBG(...) (void)0
+#define EVTIM_SVC_LOG_DBG(...) (void)0
+#endif
+
+static int
+default_port_conf_cb(uint16_t id, uint8_t event_dev_id, uint8_t *event_port_id,
+		     void *conf_arg)
+{
+	struct rte_event_timer_adapter *adapter;
+	struct rte_eventdev *dev;
+	struct rte_event_dev_config dev_conf;
+	struct rte_event_port_conf *port_conf, def_port_conf = {0};
+	int started;
+	uint8_t port_id;
+	uint8_t dev_id;
+	int ret;
+
+	RTE_SET_USED(event_dev_id);
+
+	adapter = &adapters[id];
+	dev = &rte_eventdevs[adapter->data->event_dev_id];
+	dev_id = dev->data->dev_id;
+	dev_conf = dev->data->dev_conf;
+
+	started = dev->data->dev_started;
+	if (started)
+		rte_event_dev_stop(dev_id);
+
+	port_id = dev_conf.nb_event_ports;
+	dev_conf.nb_event_ports += 1;
+	ret = rte_event_dev_configure(dev_id, &dev_conf);
+	if (ret < 0) {
+		EVTIM_LOG_ERR("failed to configure event dev %u\n", dev_id);
+		if (started)
+			if (rte_event_dev_start(dev_id))
+				return -EIO;
+
+		return ret;
+	}
+
+	if (conf_arg != NULL)
+		port_conf = conf_arg;
+	else {
+		port_conf = &def_port_conf;
+		ret = rte_event_port_default_conf_get(dev_id, port_id,
+						      port_conf);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = rte_event_port_setup(dev_id, port_id, port_conf);
+	if (ret < 0) {
+		EVTIM_LOG_ERR("failed to setup event port %u on event dev %u\n",
+			      port_id, dev_id);
+		return ret;
+	}
+
+	*event_port_id = port_id;
+
+	if (started)
+		ret = rte_event_dev_start(dev_id);
+
+	return ret;
+}
+
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_create(const struct rte_event_timer_adapter_conf *conf)
+{
+	return rte_event_timer_adapter_create_ext(conf, default_port_conf_cb,
+						  NULL);
+}
+
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_create_ext(
+		const struct rte_event_timer_adapter_conf *conf,
+		rte_event_timer_adapter_port_conf_cb_t conf_cb,
+		void *conf_arg)
+{
+	uint16_t adapter_id;
+	struct rte_event_timer_adapter *adapter;
+	const struct rte_memzone *mz;
+	char mz_name[DATA_MZ_NAME_MAX_LEN];
+	int n, ret;
+	struct rte_eventdev *dev;
+
+	if (conf == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* Check eventdev ID */
+	if (!rte_event_pmd_is_valid_dev(conf->event_dev_id)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	dev = &rte_eventdevs[conf->event_dev_id];
+
+	adapter_id = conf->timer_adapter_id;
+
+	/* Check that adapter_id is in range */
+	if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* Check adapter ID not already allocated */
+	adapter = &adapters[adapter_id];
+	if (adapter->allocated) {
+		rte_errno = EEXIST;
+		return NULL;
+	}
+
+	/* Create shared data area. */
+	n = snprintf(mz_name, sizeof(mz_name), DATA_MZ_NAME_FORMAT, adapter_id);
+	if (n >= (int)sizeof(mz_name)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	mz = rte_memzone_reserve(mz_name,
+				 sizeof(struct rte_event_timer_adapter_data),
+				 conf->socket_id, 0);
+	if (mz == NULL)
+		/* rte_errno set by rte_memzone_reserve */
+		return NULL;
+
+	adapter->data = mz->addr;
+	memset(adapter->data, 0, sizeof(struct rte_event_timer_adapter_data));
+
+	adapter->data->mz = mz;
+	adapter->data->event_dev_id = conf->event_dev_id;
+	adapter->data->id = adapter_id;
+	adapter->data->socket_id = conf->socket_id;
+	adapter->data->conf = *conf;  /* copy conf structure */
+
+	/* Query eventdev PMD for timer adapter capabilities and ops */
+	ret = dev->dev_ops->timer_adapter_caps_get(dev,
+						   adapter->data->conf.flags,
+						   &adapter->data->caps,
+						   &adapter->ops);
+	if (ret < 0) {
+		rte_errno = ret;
+		goto free_memzone;
+	}
+
+	if (!(adapter->data->caps &
+	      RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT)) {
+		FUNC_PTR_OR_NULL_RET_WITH_ERRNO(conf_cb, -EINVAL);
+		ret = conf_cb(adapter->data->id, adapter->data->event_dev_id,
+			      &adapter->data->event_port_id, conf_arg);
+		if (ret < 0) {
+			rte_errno = ret;
+			goto free_memzone;
+		}
+	}
+
+	/* If eventdev PMD did not provide ops, use default software
+	 * implementation.
+	 */
+	if (adapter->ops == NULL)
+		adapter->ops = &sw_event_adapter_timer_ops;
+
+	/* Allow driver to do some setup */
+	FUNC_PTR_OR_NULL_RET_WITH_ERRNO(adapter->ops->init, -ENOTSUP);
+	ret = adapter->ops->init(adapter);
+	if (ret < 0) {
+		rte_errno = ret;
+		goto free_memzone;
+	}
+
+	/* Set fast-path function pointers */
+	adapter->arm_burst = adapter->ops->arm_burst;
+	adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst;
+	adapter->cancel_burst = adapter->ops->cancel_burst;
+
+	adapter->allocated = 1;
+
+	return adapter;
+
+free_memzone:
+	rte_memzone_free(adapter->data->mz);
+	return NULL;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_get_info(const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_info *adapter_info)
+{
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+
+	if (adapter->ops->get_info)
+		/* let driver set values it knows */
+		adapter->ops->get_info(adapter, adapter_info);
+
+	/* Set common values */
+	adapter_info->conf = adapter->data->conf;
+	adapter_info->event_dev_port_id = adapter->data->event_port_id;
+	adapter_info->caps = adapter->data->caps;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_start(const struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->ops->start, -EINVAL);
+
+	ret = adapter->ops->start(adapter);
+	if (ret < 0)
+		return ret;
+
+	adapter->data->started = 1;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->ops->stop, -EINVAL);
+
+	if (adapter->data->started == 0) {
+		EVTIM_LOG_ERR("event timer adapter %"PRIu8" already stopped",
+			      adapter->data->id);
+		return 0;
+	}
+
+	ret = adapter->ops->stop(adapter);
+	if (ret < 0)
+		return ret;
+
+	adapter->data->started = 0;
+
+	return 0;
+}
+
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_lookup(uint16_t adapter_id)
+{
+	char name[DATA_MZ_NAME_MAX_LEN];
+	const struct rte_memzone *mz;
+	struct rte_event_timer_adapter_data *data;
+	struct rte_event_timer_adapter *adapter;
+	int ret;
+	struct rte_eventdev *dev;
+
+	if (adapters[adapter_id].allocated)
+		return &adapters[adapter_id]; /* Adapter is already loaded */
+
+	snprintf(name, DATA_MZ_NAME_MAX_LEN, DATA_MZ_NAME_FORMAT, adapter_id);
+	mz = rte_memzone_lookup(name);
+	if (mz == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	data = mz->addr;
+
+	adapter = &adapters[data->id];
+	adapter->data = data;
+
+	dev = &rte_eventdevs[adapter->data->event_dev_id];
+
+	/* Query eventdev PMD for timer adapter capabilities and ops */
+	ret = dev->dev_ops->timer_adapter_caps_get(dev,
+						   adapter->data->conf.flags,
+						   &adapter->data->caps,
+						   &adapter->ops);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* If eventdev PMD did not provide ops, use default software
+	 * implementation.
+	 */
+	if (adapter->ops == NULL)
+		adapter->ops = &sw_event_adapter_timer_ops;
+
+	/* Set fast-path function pointers */
+	adapter->arm_burst = adapter->ops->arm_burst;
+	adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst;
+	adapter->cancel_burst = adapter->ops->cancel_burst;
+
+	adapter->allocated = 1;
+
+	return adapter;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_free(struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->ops->uninit, -EINVAL);
+
+	if (adapter->data->started == 1) {
+		EVTIM_LOG_ERR("event timer adapter %"PRIu8" must be stopped "
+			      "before freeing", adapter->data->id);
+		return -EBUSY;
+	}
+
+	/* free impl priv data */
+	ret = adapter->ops->uninit(adapter);
+	if (ret < 0)
+		return ret;
+
+	/* free shared data area */
+	ret = rte_memzone_free(adapter->data->mz);
+	if (ret < 0)
+		return ret;
+
+	adapter->data = NULL;
+	adapter->allocated = 0;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
+				       uint32_t *service_id)
+{
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+
+	if (adapter->data->service_inited && service_id != NULL)
+		*service_id = adapter->data->service_id;
+
+	return adapter->data->service_inited ? 0 : -ESRCH;
+}
+
+int __rte_experimental
+rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
+				  struct rte_event_timer_adapter_stats *stats)
+{
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->ops->stats_get, -EINVAL);
+	if (stats == NULL)
+		return -EINVAL;
+
+	return adapter->ops->stats_get(adapter, stats);
+}
+
+int __rte_experimental
+rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter)
+{
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->ops->stats_reset, -EINVAL);
+	return adapter->ops->stats_reset(adapter);
+}
+
+/*
+ * Software event timer adapter buffer helper functions
+ */
+
+#define NSECPERSEC 1E9
+
+/* Optimizations used to index into the buffer require that the buffer size
+ * be a power of 2.
+ */
+#define EVENT_BUFFER_SZ 4096
+#define EVENT_BUFFER_BATCHSZ 32
+#define EVENT_BUFFER_MASK (EVENT_BUFFER_SZ - 1)
+
+struct event_buffer {
+	uint16_t head;
+	uint16_t tail;
+	struct rte_event events[EVENT_BUFFER_SZ];
+} __rte_cache_aligned;
+
+static inline bool
+event_buffer_full(struct event_buffer *bufp)
+{
+	return (bufp->head - bufp->tail) == EVENT_BUFFER_SZ;
+}
+
+static inline bool
+event_buffer_batch_ready(struct event_buffer *bufp)
+{
+	return (bufp->head - bufp->tail) >= EVENT_BUFFER_BATCHSZ;
+}
+
+static void
+event_buffer_init(struct event_buffer *bufp)
+{
+	bufp->head = bufp->tail = 0;
+	memset(&bufp->events, 0, sizeof(struct rte_event) * EVENT_BUFFER_SZ);
+}
+
+static int
+event_buffer_add(struct event_buffer *bufp, struct rte_event *eventp)
+{
+	uint16_t head_idx;
+	struct rte_event *buf_eventp;
+
+	if (event_buffer_full(bufp))
+		return -1;
+
+	/* Instead of modulus, bitwise AND with mask to get head_idx. */
+	head_idx = bufp->head & EVENT_BUFFER_MASK;
+	buf_eventp = &bufp->events[head_idx];
+	rte_memcpy(buf_eventp, eventp, sizeof(struct rte_event));
+
+	/* Wrap automatically when overflow occurs. */
+	bufp->head++;
+
+	return 0;
+}
+
+static void
+event_buffer_flush(struct event_buffer *bufp, uint8_t dev_id, uint8_t port_id,
+		   uint16_t *nb_events_flushed,
+		   uint16_t *nb_events_inv)
+{
+	uint16_t head_idx, tail_idx, n = 0;
+	struct rte_event *events = bufp->events;
+
+	/* Instead of modulus, bitwise AND with mask to get index. */
+	head_idx = bufp->head & EVENT_BUFFER_MASK;
+	tail_idx = bufp->tail & EVENT_BUFFER_MASK;
+
+	/* Determine the largest contigous run we can attempt to enqueue to the
+	 * event device.
+	 */
+	if (head_idx > tail_idx)
+		n = head_idx - tail_idx;
+	else if (head_idx < tail_idx)
+		n = EVENT_BUFFER_SZ - tail_idx;
+	else {
+		*nb_events_flushed = 0;
+		return;
+	}
+
+	*nb_events_inv = 0;
+	*nb_events_flushed = rte_event_enqueue_burst(dev_id, port_id,
+						     &events[tail_idx], n);
+	if (*nb_events_flushed != n && rte_errno == -EINVAL) {
+		EVTIM_LOG_ERR("failed to enqueue invalid event - dropping it");
+		(*nb_events_inv)++;
+	}
+
+	bufp->tail = bufp->tail + *nb_events_flushed + *nb_events_inv;
+}
+
+/*
+ * Software event timer adapter implementation
+ */
+
+struct rte_event_timer_adapter_sw_data {
+	/* List of messages for outstanding timers */
+	TAILQ_HEAD(, msg) msgs_tailq_head;
+	/* Lock to guard tailq and armed count */
+	rte_spinlock_t msgs_tailq_sl;
+	/* Identifier of service executing timer management logic. */
+	uint32_t service_id;
+	/* The cycle count at which the adapter should next tick */
+	uint64_t next_tick_cycles;
+	/* Incremented as the service moves through phases of an iteration */
+	volatile int service_phase;
+	/* The tick resolution used by adapter instance. May have been
+	 * adjusted from what user requested
+	 */
+	uint64_t timer_tick_ns;
+	/* Maximum timeout in nanoseconds allowed by adapter instance. */
+	uint64_t max_tmo_ns;
+	/* Ring containing messages to arm or cancel event timers */
+	struct rte_ring *msg_ring;
+	/* Mempool containing msg objects */
+	struct rte_mempool *msg_pool;
+	/* Buffered timer expiry events to be enqueued to an event device. */
+	struct event_buffer buffer;
+	/* Statistics */
+	struct rte_event_timer_adapter_stats stats;
+	/* The number of threads currently adding to the message ring */
+	rte_atomic16_t message_producer_count;
+};
+
+enum msg_type {MSG_TYPE_ARM, MSG_TYPE_CANCEL};
+
+struct msg {
+	enum msg_type type;
+	struct rte_event_timer *evtim;
+	struct rte_timer tim;
+	TAILQ_ENTRY(msg) msgs;
+};
+
+static void
+sw_event_timer_cb(struct rte_timer *tim, void *arg)
+{
+	int ret;
+	uint16_t nb_evs_flushed = 0;
+	uint16_t nb_evs_invalid = 0;
+	uint64_t opaque;
+	struct rte_event_timer *evtim;
+	struct rte_event_timer_adapter *adapter;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+
+	evtim = arg;
+	opaque = evtim->impl_opaque[1];
+	adapter = (struct rte_event_timer_adapter *)(uintptr_t)opaque;
+	sw_data = adapter->data->adapter_priv;
+
+	ret = event_buffer_add(&sw_data->buffer, &evtim->ev);
+	if (ret < 0) {
+		/* If event buffer is full, put timer back in list with
+		 * immediate expiry value, so that we process it again on the
+		 * next iteration.
+		 */
+		rte_timer_reset_sync(tim, 0, SINGLE, rte_lcore_id(),
+				     sw_event_timer_cb, evtim);
+
+		sw_data->stats.evtim_retry_count++;
+		EVTIM_LOG_DBG("event buffer full, resetting rte_timer with "
+			      "immediate expiry value");
+	} else {
+		struct msg *m = container_of(tim, struct msg, tim);
+		TAILQ_REMOVE(&sw_data->msgs_tailq_head, m, msgs);
+		EVTIM_BUF_LOG_DBG("buffered an event timer expiry event");
+		evtim->state = RTE_EVENT_TIMER_NOT_ARMED;
+
+		/* Free the msg object containing the rte_timer now that
+		 * we've buffered its event successfully.
+		 */
+		rte_mempool_put(sw_data->msg_pool, m);
+
+		/* Bump the count when we successfully add an expiry event to
+		 * the buffer.
+		 */
+		sw_data->stats.evtim_exp_count++;
+	}
+
+	if (event_buffer_batch_ready(&sw_data->buffer)) {
+		event_buffer_flush(&sw_data->buffer,
+				   adapter->data->event_dev_id,
+				   adapter->data->event_port_id,
+				   &nb_evs_flushed,
+				   &nb_evs_invalid);
+
+		sw_data->stats.ev_enq_count += nb_evs_flushed;
+		sw_data->stats.ev_inv_count += nb_evs_invalid;
+	}
+}
+
+static __rte_always_inline uint64_t
+get_timeout_cycles(struct rte_event_timer *evtim,
+		   struct rte_event_timer_adapter *adapter)
+{
+	uint64_t timeout_ns;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+
+	sw_data = adapter->data->adapter_priv;
+	timeout_ns = evtim->timeout_ticks * sw_data->timer_tick_ns;
+	return timeout_ns * rte_get_timer_hz() / NSECPERSEC;
+
+}
+
+/* This function returns true if one or more (adapter) ticks have occurred since
+ * the last time it was called.
+ */
+static inline bool
+adapter_did_tick(struct rte_event_timer_adapter *adapter)
+{
+	uint64_t cycles_per_adapter_tick, start_cycles;
+	uint64_t *next_tick_cyclesp;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+
+	sw_data = adapter->data->adapter_priv;
+	next_tick_cyclesp = &sw_data->next_tick_cycles;
+
+	cycles_per_adapter_tick = sw_data->timer_tick_ns *
+			(rte_get_timer_hz() / NSECPERSEC);
+
+	start_cycles = rte_get_timer_cycles();
+
+	/* Note: initially, *next_tick_cyclesp == 0, so the clause below will
+	 * execute, and set things going.
+	 */
+
+	if (start_cycles >= *next_tick_cyclesp) {
+		/* Snap the current cycle count to the preceding adapter tick
+		 * boundary.
+		 */
+		start_cycles -= start_cycles % cycles_per_adapter_tick;
+
+		*next_tick_cyclesp = start_cycles + cycles_per_adapter_tick;
+
+		return true;
+	}
+
+	return false;
+}
+
+/* Check that event timer timeout value is in range */
+static __rte_always_inline int
+check_timeout(struct rte_event_timer *evtim,
+	      const struct rte_event_timer_adapter *adapter)
+{
+	uint64_t tmo_nsec;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+
+	sw_data = adapter->data->adapter_priv;
+	tmo_nsec = evtim->timeout_ticks * sw_data->timer_tick_ns;
+
+	if (tmo_nsec > sw_data->max_tmo_ns)
+		return -1;
+
+	if (tmo_nsec < sw_data->timer_tick_ns)
+		return -2;
+
+	return 0;
+}
+
+/* Check that event timer event queue sched type matches destination event queue
+ * sched type
+ */
+static __rte_always_inline int
+check_destination_event_queue(struct rte_event_timer *evtim,
+			      const struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+	uint32_t sched_type;
+
+	ret = rte_event_queue_attr_get(adapter->data->event_dev_id,
+				       evtim->ev.queue_id,
+				       RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE,
+				       &sched_type);
+
+	if ((ret < 0 && ret != -EOVERFLOW) ||
+	    evtim->ev.sched_type != sched_type)
+		return -1;
+
+	return 0;
+}
+
+#define NB_OBJS 32
+static int
+sw_event_timer_adapter_service_func(void *arg)
+{
+	int i, num_msgs;
+	uint64_t cycles, opaque;
+	uint16_t nb_evs_flushed = 0;
+	uint16_t nb_evs_invalid = 0;
+	struct rte_event_timer_adapter *adapter;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	struct rte_event_timer *evtim = NULL;
+	struct rte_timer *tim = NULL;
+	struct msg *msg, *msgs[NB_OBJS];
+
+	adapter = arg;
+	sw_data = adapter->data->adapter_priv;
+
+	sw_data->service_phase = 1;
+	rte_smp_wmb();
+
+	while (rte_atomic16_read(&sw_data->message_producer_count) > 0 ||
+	       !rte_ring_empty(sw_data->msg_ring)) {
+
+		num_msgs = rte_ring_dequeue_burst(sw_data->msg_ring,
+						  (void **)msgs, NB_OBJS, NULL);
+
+		for (i = 0; i < num_msgs; i++) {
+			int ret = 0;
+
+			RTE_SET_USED(ret);
+
+			msg = msgs[i];
+			evtim = msg->evtim;
+
+			switch (msg->type) {
+			case MSG_TYPE_ARM:
+				EVTIM_SVC_LOG_DBG("dequeued ARM message from "
+						  "ring");
+				tim = &msg->tim;
+				rte_timer_init(tim);
+				cycles = get_timeout_cycles(evtim,
+							    adapter);
+				ret = rte_timer_reset(tim, cycles, SINGLE,
+						      rte_lcore_id(),
+						      sw_event_timer_cb,
+						      evtim);
+				RTE_ASSERT(ret == 0);
+
+				evtim->impl_opaque[0] = (uintptr_t)tim;
+				evtim->impl_opaque[1] = (uintptr_t)adapter;
+
+				TAILQ_INSERT_TAIL(&sw_data->msgs_tailq_head,
+						  msg,
+						  msgs);
+				break;
+			case MSG_TYPE_CANCEL:
+				EVTIM_SVC_LOG_DBG("dequeued CANCEL message "
+						  "from ring");
+				opaque = evtim->impl_opaque[0];
+				tim = (struct rte_timer *)(uintptr_t)opaque;
+				RTE_ASSERT(tim != NULL);
+
+				ret = rte_timer_stop(tim);
+				RTE_ASSERT(ret == 0);
+
+				/* Free the msg object for the original arm
+				 * request.
+				 */
+				struct msg *m;
+				m = container_of(tim, struct msg, tim);
+				TAILQ_REMOVE(&sw_data->msgs_tailq_head, m,
+					     msgs);
+				rte_mempool_put(sw_data->msg_pool, m);
+
+				/* Free the msg object for the current msg */
+				rte_mempool_put(sw_data->msg_pool, msg);
+
+				evtim->impl_opaque[0] = 0;
+				evtim->impl_opaque[1] = 0;
+
+				break;
+			}
+		}
+	}
+
+	sw_data->service_phase = 2;
+	rte_smp_wmb();
+
+	if (adapter_did_tick(adapter)) {
+		rte_timer_manage();
+
+		event_buffer_flush(&sw_data->buffer,
+				   adapter->data->event_dev_id,
+				   adapter->data->event_port_id,
+				   &nb_evs_flushed, &nb_evs_invalid);
+
+		sw_data->stats.ev_enq_count += nb_evs_flushed;
+		sw_data->stats.ev_inv_count += nb_evs_invalid;
+		sw_data->stats.adapter_tick_count++;
+	}
+
+	sw_data->service_phase = 0;
+	rte_smp_wmb();
+
+	return 0;
+}
+
+/* The adapter initialization function rounds the mempool size up to the next
+ * power of 2, so we can take the difference between that value and what the
+ * user requested, and use the space for caches.  This avoids a scenario where a
+ * user can't arm the number of timers the adapter was configured with because
+ * mempool objects have been lost to caches.
+ *
+ * nb_actual should always be a power of 2, so we can iterate over the powers
+ * of 2 to see what the largest cache size we can use is.
+ */
+static int
+compute_msg_mempool_cache_size(uint64_t nb_requested, uint64_t nb_actual)
+{
+	int i;
+	int size;
+	int cache_size = 0;
+
+	for (i = 0; ; i++) {
+		size = 1 << i;
+
+		if (RTE_MAX_LCORE * size < (int)(nb_actual - nb_requested) &&
+		    size < RTE_MEMPOOL_CACHE_MAX_SIZE &&
+		    size <= nb_actual / 1.5)
+			cache_size = size;
+		else
+			break;
+	}
+
+	return cache_size;
+}
+
+#define SW_MIN_INTERVAL 1E5
+
+static int
+sw_event_timer_adapter_init(struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	uint64_t nb_timers;
+	unsigned int flags;
+	struct rte_service_spec service;
+	static bool timer_subsystem_inited; // static initialized to false
+
+	/* Allocate storage for SW implementation data */
+	char priv_data_name[RTE_RING_NAMESIZE];
+	snprintf(priv_data_name, RTE_RING_NAMESIZE, "sw_evtim_adap_priv_%"PRIu8,
+		 adapter->data->id);
+	adapter->data->adapter_priv = rte_zmalloc_socket(
+				priv_data_name,
+				sizeof(struct rte_event_timer_adapter_sw_data),
+				RTE_CACHE_LINE_SIZE,
+				adapter->data->socket_id);
+	if (adapter->data->adapter_priv == NULL) {
+		EVTIM_LOG_ERR("failed to allocate space for private data");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (adapter->data->conf.timer_tick_ns < SW_MIN_INTERVAL) {
+		EVTIM_LOG_ERR("failed to create adapter with requested tick "
+			      "interval");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	sw_data = adapter->data->adapter_priv;
+
+	sw_data->timer_tick_ns = adapter->data->conf.timer_tick_ns;
+	sw_data->max_tmo_ns = adapter->data->conf.max_tmo_ns;
+
+	TAILQ_INIT(&sw_data->msgs_tailq_head);
+	rte_spinlock_init(&sw_data->msgs_tailq_sl);
+	rte_atomic16_init(&sw_data->message_producer_count);
+
+	/* Rings require power of 2, so round up to next such value */
+	nb_timers = rte_align64pow2(adapter->data->conf.nb_timers);
+
+	char msg_ring_name[RTE_RING_NAMESIZE];
+	snprintf(msg_ring_name, RTE_RING_NAMESIZE,
+		 "sw_evtim_adap_msg_ring_%"PRIu8, adapter->data->id);
+	flags = adapter->data->conf.flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT ?
+		RING_F_SP_ENQ | RING_F_SC_DEQ :
+		RING_F_SC_DEQ;
+	sw_data->msg_ring = rte_ring_create(msg_ring_name, nb_timers,
+					    adapter->data->socket_id, flags);
+	if (sw_data->msg_ring == NULL) {
+		EVTIM_LOG_ERR("failed to create message ring");
+		rte_errno = ENOMEM;
+		goto free_priv_data;
+	}
+
+	char pool_name[RTE_RING_NAMESIZE];
+	snprintf(pool_name, RTE_RING_NAMESIZE, "sw_evtim_adap_msg_pool_%"PRIu8,
+		 adapter->data->id);
+
+	/* Both the arming/canceling thread and the service thread will do puts
+	 * to the mempool, but if the SP_PUT flag is enabled, we can specify
+	 * single-consumer get for the mempool.
+	 */
+	flags = adapter->data->conf.flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT ?
+		MEMPOOL_F_SC_GET : 0;
+
+	/* The usable size of a ring is count - 1, so subtract one here to
+	 * make the counts agree.
+	 */
+	int pool_size = nb_timers - 1;
+	int cache_size = compute_msg_mempool_cache_size(
+				adapter->data->conf.nb_timers, nb_timers);
+	sw_data->msg_pool = rte_mempool_create(pool_name, pool_size,
+					       sizeof(struct msg), cache_size,
+					       0, NULL, NULL, NULL, NULL,
+					       adapter->data->socket_id, flags);
+	if (sw_data->msg_pool == NULL) {
+		EVTIM_LOG_ERR("failed to create message object mempool");
+		rte_errno = ENOMEM;
+		goto free_msg_ring;
+	}
+
+	event_buffer_init(&sw_data->buffer);
+
+	/* Register a service component to run adapter logic */
+	memset(&service, 0, sizeof(service));
+	snprintf(service.name, RTE_SERVICE_NAME_MAX,
+		 "sw_evimer_adap_svc_%"PRIu8, adapter->data->id);
+	service.socket_id = adapter->data->socket_id;
+	service.callback = sw_event_timer_adapter_service_func;
+	service.callback_userdata = adapter;
+	service.capabilities &= ~(RTE_SERVICE_CAP_MT_SAFE);
+	ret = rte_service_component_register(&service, &sw_data->service_id);
+	if (ret < 0) {
+		EVTIM_LOG_ERR("failed to register service %s with id %"PRIu32
+			      ": err = %d", service.name, sw_data->service_id,
+			      ret);
+
+		rte_errno = ENOSPC;
+		goto free_msg_pool;
+	}
+
+	EVTIM_LOG_DBG("registered service %s with id %"PRIu32, service.name,
+		      sw_data->service_id);
+
+	adapter->data->service_id = sw_data->service_id;
+	adapter->data->service_inited = 1;
+
+	if (!timer_subsystem_inited) {
+		rte_timer_subsystem_init();
+		timer_subsystem_inited = true;
+	}
+
+	return 0;
+
+free_msg_pool:
+	rte_mempool_free(sw_data->msg_pool);
+free_msg_ring:
+	rte_ring_free(sw_data->msg_ring);
+free_priv_data:
+	rte_free(sw_data);
+	return -1;
+}
+
+static int
+sw_event_timer_adapter_uninit(struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+	struct msg *m1, *m2;
+	struct rte_event_timer_adapter_sw_data *sw_data =
+						adapter->data->adapter_priv;
+
+	rte_spinlock_lock(&sw_data->msgs_tailq_sl);
+
+	/* Cancel outstanding rte_timers and free msg objects */
+	m1 = TAILQ_FIRST(&sw_data->msgs_tailq_head);
+	while (m1 != NULL) {
+		EVTIM_LOG_DBG("freeing outstanding timer");
+		m2 = TAILQ_NEXT(m1, msgs);
+
+		rte_timer_stop_sync(&m1->tim);
+		rte_mempool_put(sw_data->msg_pool, m1);
+
+		m1 = m2;
+	}
+
+	rte_spinlock_unlock(&sw_data->msgs_tailq_sl);
+
+	ret = rte_service_component_unregister(sw_data->service_id);
+	if (ret < 0) {
+		EVTIM_LOG_ERR("failed to unregister service component");
+		return ret;
+	}
+
+	rte_ring_free(sw_data->msg_ring);
+	rte_mempool_free(sw_data->msg_pool);
+	rte_free(adapter->data->adapter_priv);
+
+	return 0;
+}
+
+static inline int32_t
+get_mapped_count_for_service(uint32_t service_id)
+{
+	int32_t core_count, i, mapped_count = 0;
+	uint32_t lcore_arr[RTE_MAX_LCORE];
+
+	core_count = rte_service_lcore_list(lcore_arr, RTE_MAX_LCORE);
+
+	for (i = 0; i < core_count; i++)
+		if (rte_service_map_lcore_get(service_id, lcore_arr[i]) == 1)
+			mapped_count++;
+
+	return mapped_count;
+}
+
+static int
+sw_event_timer_adapter_start(const struct rte_event_timer_adapter *adapter)
+{
+	int mapped_count;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+
+	sw_data = adapter->data->adapter_priv;
+
+	/* Mapping the service to more than one service core can introduce
+	 * delays while one thread is waiting to acquire a lock, so only allow
+	 * one core to be mapped to the service.
+	 */
+	mapped_count = get_mapped_count_for_service(sw_data->service_id);
+
+	if (mapped_count == 1)
+		return rte_service_component_runstate_set(sw_data->service_id,
+							  1);
+
+	return mapped_count < 1 ? -ENOENT : -ENOTSUP;
+}
+
+static int
+sw_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter)
+{
+	int ret;
+	struct rte_event_timer_adapter_sw_data *sw_data =
+						adapter->data->adapter_priv;
+
+	ret = rte_service_component_runstate_set(sw_data->service_id, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Wait for the service to complete its final iteration before
+	 * stopping.
+	 */
+	while (sw_data->service_phase != 0)
+		rte_pause();
+
+	rte_smp_rmb();
+
+	return 0;
+}
+
+static void
+sw_event_timer_adapter_get_info(const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_info *adapter_info)
+{
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	sw_data = adapter->data->adapter_priv;
+
+	adapter_info->min_resolution_ns = sw_data->timer_tick_ns;
+	adapter_info->max_tmo_ns = sw_data->max_tmo_ns;
+}
+
+static int
+sw_event_timer_adapter_stats_get(const struct rte_event_timer_adapter *adapter,
+				 struct rte_event_timer_adapter_stats *stats)
+{
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	sw_data = adapter->data->adapter_priv;
+	*stats = sw_data->stats;
+	return 0;
+}
+
+static int
+sw_event_timer_adapter_stats_reset(
+				const struct rte_event_timer_adapter *adapter)
+{
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	sw_data = adapter->data->adapter_priv;
+	memset(&sw_data->stats, 0, sizeof(sw_data->stats));
+	return 0;
+}
+
+static __rte_always_inline uint16_t
+__sw_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter,
+			  struct rte_event_timer **evtims,
+			  uint16_t nb_evtims)
+{
+	uint16_t i;
+	int ret;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	struct msg *msgs[nb_evtims];
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	/* Check that the service is running. */
+	if (rte_service_runstate_get(adapter->data->service_id) != 1) {
+		rte_errno = EINVAL;
+		return 0;
+	}
+#endif
+
+	sw_data = adapter->data->adapter_priv;
+
+	ret = rte_mempool_get_bulk(sw_data->msg_pool, (void **)msgs, nb_evtims);
+	if (ret < 0) {
+		rte_errno = ENOSPC;
+		return 0;
+	}
+
+	/* Let the service know we're producing messages for it to process */
+	rte_atomic16_inc(&sw_data->message_producer_count);
+
+	/* If the service is managing timers, wait for it to finish */
+	while (sw_data->service_phase == 2)
+		rte_pause();
+
+	rte_smp_rmb();
+
+	for (i = 0; i < nb_evtims; i++) {
+		/* Don't modify the event timer state in these cases */
+		if (evtims[i]->state == RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EALREADY;
+			break;
+		} else if (!(evtims[i]->state == RTE_EVENT_TIMER_NOT_ARMED ||
+		    evtims[i]->state == RTE_EVENT_TIMER_CANCELED)) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		ret = check_timeout(evtims[i], adapter);
+		if (ret == -1) {
+			evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
+			rte_errno = EINVAL;
+			break;
+		}
+		if (ret == -2) {
+			evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
+			rte_errno = EINVAL;
+			break;
+		}
+
+		if (check_destination_event_queue(evtims[i], adapter) < 0) {
+			evtims[i]->state = RTE_EVENT_TIMER_ERROR;
+			rte_errno = EINVAL;
+			break;
+		}
+
+		/* Checks passed, set up a message to enqueue */
+		msgs[i]->type = MSG_TYPE_ARM;
+		msgs[i]->evtim = evtims[i];
+
+		/* Set the payload pointer if not set. */
+		if (evtims[i]->ev.event_ptr == NULL)
+			evtims[i]->ev.event_ptr = evtims[i];
+
+		/* msg objects that get enqueued successfully will be freed
+		 * either by a future cancel operation or by the timer
+		 * expiration callback.
+		 */
+		if (rte_ring_enqueue(sw_data->msg_ring, msgs[i]) < 0) {
+			rte_errno = ENOSPC;
+			break;
+		}
+
+		EVTIM_LOG_DBG("enqueued ARM message to ring");
+
+		evtims[i]->state = RTE_EVENT_TIMER_ARMED;
+	}
+
+	/* Let the service know we're done producing messages */
+	rte_atomic16_dec(&sw_data->message_producer_count);
+
+	if (i < nb_evtims)
+		rte_mempool_put_bulk(sw_data->msg_pool, (void **)&msgs[i],
+				     nb_evtims - i);
+
+	return i;
+}
+
+static uint16_t
+sw_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter,
+			 struct rte_event_timer **evtims,
+			 uint16_t nb_evtims)
+{
+	return __sw_event_timer_arm_burst(adapter, evtims, nb_evtims);
+}
+
+static uint16_t
+sw_event_timer_cancel_burst(const struct rte_event_timer_adapter *adapter,
+			    struct rte_event_timer **evtims,
+			    uint16_t nb_evtims)
+{
+	uint16_t i;
+	int ret;
+	struct rte_event_timer_adapter_sw_data *sw_data;
+	struct msg *msgs[nb_evtims];
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	/* Check that the service is running. */
+	if (rte_service_runstate_get(adapter->data->service_id) != 1) {
+		rte_errno = EINVAL;
+		return 0;
+	}
+#endif
+
+	sw_data = adapter->data->adapter_priv;
+
+	ret = rte_mempool_get_bulk(sw_data->msg_pool, (void **)msgs, nb_evtims);
+	if (ret < 0) {
+		rte_errno = ENOSPC;
+		return 0;
+	}
+
+	/* Let the service know we're producing messages for it to process */
+	rte_atomic16_inc(&sw_data->message_producer_count);
+
+	/* If the service could be modifying event timer states, wait */
+	while (sw_data->service_phase == 2)
+		rte_pause();
+
+	rte_smp_rmb();
+
+	for (i = 0; i < nb_evtims; i++) {
+		/* Don't modify the event timer state in these cases */
+		if (evtims[i]->state == RTE_EVENT_TIMER_CANCELED) {
+			rte_errno = EALREADY;
+			break;
+		} else if (evtims[i]->state != RTE_EVENT_TIMER_ARMED) {
+			rte_errno = EINVAL;
+			break;
+		}
+
+		msgs[i]->type = MSG_TYPE_CANCEL;
+		msgs[i]->evtim = evtims[i];
+
+		if (rte_ring_enqueue(sw_data->msg_ring, msgs[i]) < 0) {
+			rte_errno = ENOSPC;
+			break;
+		}
+
+		EVTIM_LOG_DBG("enqueued CANCEL message to ring");
+
+		evtims[i]->state = RTE_EVENT_TIMER_CANCELED;
+	}
+
+	/* Let the service know we're done producing messages */
+	rte_atomic16_dec(&sw_data->message_producer_count);
+
+	if (i < nb_evtims)
+		rte_mempool_put_bulk(sw_data->msg_pool, (void **)&msgs[i],
+				     nb_evtims - i);
+
+	return i;
+}
+
+static uint16_t
+sw_event_timer_arm_tmo_tick_burst(const struct rte_event_timer_adapter *adapter,
+				  struct rte_event_timer **evtims,
+				  uint64_t timeout_ticks,
+				  uint16_t nb_evtims)
+{
+	int i;
+
+	for (i = 0; i < nb_evtims; i++)
+		evtims[i]->timeout_ticks = timeout_ticks;
+
+	return __sw_event_timer_arm_burst(adapter, evtims, nb_evtims);
+}
+
+static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops = {
+	.init = sw_event_timer_adapter_init,
+	.uninit = sw_event_timer_adapter_uninit,
+	.start = sw_event_timer_adapter_start,
+	.stop = sw_event_timer_adapter_stop,
+	.get_info = sw_event_timer_adapter_get_info,
+	.stats_get = sw_event_timer_adapter_stats_get,
+	.stats_reset = sw_event_timer_adapter_stats_reset,
+	.arm_burst = sw_event_timer_arm_burst,
+	.arm_tmo_tick_burst = sw_event_timer_arm_tmo_tick_burst,
+	.cancel_burst = sw_event_timer_cancel_burst,
+};
+
+RTE_INIT(event_timer_adapter_init_log)
+{
+	evtim_logtype = rte_log_register("lib.eventdev.adapter.timer");
+	if (evtim_logtype >= 0)
+		rte_log_set_level(evtim_logtype, RTE_LOG_NOTICE);
+
+	evtim_buffer_logtype = rte_log_register("lib.eventdev.adapter.timer."
+						"buffer");
+	if (evtim_buffer_logtype >= 0)
+		rte_log_set_level(evtim_buffer_logtype, RTE_LOG_NOTICE);
+
+	evtim_svc_logtype = rte_log_register("lib.eventdev.adapter.timer.svc");
+	if (evtim_svc_logtype >= 0)
+		rte_log_set_level(evtim_svc_logtype, RTE_LOG_NOTICE);
+}
diff --git a/lib/librte_eventdev/rte_event_timer_adapter.h b/lib/librte_eventdev/rte_event_timer_adapter.h
new file mode 100644
index 00000000..d4ea6f17
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_timer_adapter.h
@@ -0,0 +1,766 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc.
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef __RTE_EVENT_TIMER_ADAPTER_H__
+#define __RTE_EVENT_TIMER_ADAPTER_H__
+
+/**
+ * @file
+ *
+ * RTE Event Timer Adapter
+ *
+ * An event timer adapter has the following abstract working model:
+ *
+ *                               timer_tick_ns
+ *                                   +
+ *                      +-------+    |
+ *                      |       |    |
+ *              +-------+ bkt 0 +----v---+
+ *              |       |       |        |
+ *              |       +-------+        |
+ *          +---+---+                +---+---+  +---+---+---+---+
+ *          |       |                |       |  |   |   |   |   |
+ *          | bkt n |                | bkt 1 |<-> t0| t1| t2| tn|
+ *          |       |                |       |  |   |   |   |   |
+ *          +---+---+                +---+---+  +---+---+---+---+
+ *              |     Timer adapter      |
+ *          +---+---+                +---+---+
+ *          |       |                |       |
+ *          | bkt 4 |                | bkt 2 |<--- Current bucket
+ *          |       |                |       |
+ *          +---+---+                +---+---+
+ *               |      +-------+       |
+ *               |      |       |       |
+ *               +------+ bkt 3 +-------+
+ *                      |       |
+ *                      +-------+
+ *
+ * - It has a virtual monotonically increasing 64-bit timer adapter clock based
+ *   on *enum rte_event_timer_adapter_clk_src* clock source. The clock source
+ *   could be a CPU clock, or a platform dependent external clock.
+ *
+ * - The application creates a timer adapter instance with given the clock
+ *   source, the total number of event timers, and a resolution(expressed in ns)
+ *   to traverse between the buckets.
+ *
+ * - Each timer adapter may have 0 to n buckets based on the configured
+ *   max timeout(max_tmo_ns) and resolution(timer_tick_ns). Upon starting the
+ *   timer adapter, the adapter starts ticking at *timer_tick_ns* resolution.
+ *
+ * - The application arms an event timer that will expire *timer_tick_ns*
+ *   from now.
+ *
+ * - The application can cancel an armed timer and no timer expiry event will be
+ *   generated.
+ *
+ * - If a timer expires then the library injects the timer expiry event in
+ *   the designated event queue.
+ *
+ * - The timer expiry event will be received through *rte_event_dequeue_burst*.
+ *
+ * - The application frees the timer adapter instance.
+ *
+ * Multiple timer adapters can be created with a varying level of resolution
+ * for various expiry use cases that run in parallel.
+ *
+ * Before using the timer adapter, the application has to create and configure
+ * an event device along with the event port. Based on the event device
+ * capability it might require creating an additional event port to be used
+ * by the timer adapter.
+ *
+ * The application creates the event timer adapter using the
+ * ``rte_event_timer_adapter_create()``. The event device id is passed to this
+ * function, inside this function the event device capability is checked,
+ * and if an in-built port is absent the application uses the default
+ * function to create a new producer port.
+ *
+ * The application may also use the function
+ * ``rte_event_timer_adapter_create_ext()`` to have granular control over
+ * producer port creation in a case where the in-built port is absent.
+ *
+ * After creating the timer adapter, the application has to start it
+ * using ``rte_event_timer_adapter_start()``. The buckets are traversed from
+ * 0 to n; when the adapter ticks, the next bucket is visited. Each time,
+ * the list per bucket is processed, and timer expiry events are sent to the
+ * designated event queue.
+ *
+ * The application can arm one or more event timers using the
+ * ``rte_event_timer_arm_burst()``. The *timeout_ticks* represents the number
+ * of *timer_tick_ns* after which the timer has to expire. The timeout at
+ * which the timers expire can be grouped or be independent of each
+ * event timer instance. ``rte_event_timer_arm_tmo_tick_burst()`` addresses the
+ * former case and ``rte_event_timer_arm_burst()`` addresses the latter case.
+ *
+ * The application can cancel the timers from expiring using the
+ * ``rte_event_timer_cancel_burst()``.
+ *
+ * On the secondary process, ``rte_event_timer_adapter_lookup()`` can be used
+ * to get the timer adapter pointer from its id and use it to invoke fastpath
+ * operations such as arm and cancel.
+ *
+ * Some of the use cases of event timer adapter are Beacon Timers,
+ * Generic SW Timeout, Wireless MAC Scheduling, 3G Frame Protocols,
+ * Packet Scheduling, Protocol Retransmission Timers, Supervision Timers.
+ * All these use cases require high resolution and low time drift.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_spinlock.h>
+#include <rte_memory.h>
+
+#include "rte_eventdev.h"
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this enum may change without prior notice
+ *
+ * Timer adapter clock source
+ */
+enum rte_event_timer_adapter_clk_src {
+	RTE_EVENT_TIMER_ADAPTER_CPU_CLK,
+	/**< Use CPU clock as the clock source. */
+	RTE_EVENT_TIMER_ADAPTER_EXT_CLK0,
+	/**< Platform dependent external clock source 0. */
+	RTE_EVENT_TIMER_ADAPTER_EXT_CLK1,
+	/**< Platform dependent external clock source 1. */
+	RTE_EVENT_TIMER_ADAPTER_EXT_CLK2,
+	/**< Platform dependent external clock source 2. */
+	RTE_EVENT_TIMER_ADAPTER_EXT_CLK3,
+	/**< Platform dependent external clock source 3. */
+};
+
+#define RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES	(1ULL << 0)
+/**< The event timer adapter implementation may have constraints on the
+ * resolution (timer_tick_ns) and maximum timer expiry timeout(max_tmo_ns)
+ * based on the given timer adapter or system. If this flag is set, the
+ * implementation adjusts the resolution and maximum timeout to the best
+ * possible configuration. On successful timer adapter creation, the
+ * application can get the configured resolution and max timeout with
+ * ``rte_event_timer_adapter_get_info()``.
+ *
+ * @see struct rte_event_timer_adapter_info::min_resolution_ns
+ * @see struct rte_event_timer_adapter_info::max_tmo_ns
+ */
+#define RTE_EVENT_TIMER_ADAPTER_F_SP_PUT	(1ULL << 1)
+/**< ``rte_event_timer_arm_burst()`` API to be used in single producer mode.
+ *
+ * @see struct rte_event_timer_adapter_conf::flags
+ */
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Timer adapter configuration structure
+ */
+struct rte_event_timer_adapter_conf {
+	uint8_t event_dev_id;
+	/**< Event device identifier */
+	uint16_t timer_adapter_id;
+	/**< Event timer adapter identifier */
+	uint32_t socket_id;
+	/**< Identifier of socket from which to allocate memory for adapter */
+	enum rte_event_timer_adapter_clk_src clk_src;
+	/**< Clock source for timer adapter */
+	uint64_t timer_tick_ns;
+	/**< Timer adapter resolution in ns */
+	uint64_t max_tmo_ns;
+	/**< Maximum timer timeout(expiry) in ns */
+	uint64_t nb_timers;
+	/**< Total number of timers per adapter */
+	uint64_t flags;
+	/**< Timer adapter config flags (RTE_EVENT_TIMER_ADAPTER_F_*) */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Event timer adapter stats structure
+ */
+struct rte_event_timer_adapter_stats {
+	uint64_t evtim_exp_count;
+	/**< Number of event timers that have expired. */
+	uint64_t ev_enq_count;
+	/**< Eventdev enqueue count */
+	uint64_t ev_inv_count;
+	/**< Invalid expiry event count */
+	uint64_t evtim_retry_count;
+	/**< Event timer retry count */
+	uint64_t adapter_tick_count;
+	/**< Tick count for the adapter, at its resolution */
+};
+
+struct rte_event_timer_adapter;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Callback function type for producer port creation.
+ */
+typedef int (*rte_event_timer_adapter_port_conf_cb_t)(uint16_t id,
+						      uint8_t event_dev_id,
+						      uint8_t *event_port_id,
+						      void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create an event timer adapter.
+ *
+ * This function must be invoked first before any other function in the API.
+ *
+ * @param conf
+ *   The event timer adapter configuration structure.
+ *
+ * @return
+ *   A pointer to the new allocated event timer adapter on success.
+ *   NULL on error with rte_errno set appropriately.
+ *   Possible rte_errno values include:
+ *   - ERANGE: timer_tick_ns is not in supported range.
+ *   - ENOMEM: unable to allocate sufficient memory for adapter instances
+ *   - EINVAL: invalid event device identifier specified in config
+ *   - ENOSPC: maximum number of adapters already created
+ *   - EIO: event device reconfiguration and restart error.  The adapter
+ *   reconfigures the event device with an additional port by default if it is
+ *   required to use a service to manage timers. If the device had been started
+ *   before this call, this error code indicates an error in restart following
+ *   an error in reconfiguration, i.e., a combination of the two error codes.
+ */
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_create(const struct rte_event_timer_adapter_conf *conf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a timer adapter with the supplied callback.
+ *
+ * This function can be used to have a more granular control over the timer
+ * adapter creation.  If a built-in port is absent, then the function uses the
+ * callback provided to create and get the port id to be used as a producer
+ * port.
+ *
+ * @param conf
+ *   The timer adapter configuration structure
+ * @param conf_cb
+ *   The port config callback function.
+ * @param conf_arg
+ *   Opaque pointer to the argument for the callback function
+ *
+ * @return
+ *   A pointer to the new allocated event timer adapter on success.
+ *   NULL on error with rte_errno set appropriately.
+ *   Possible rte_errno values include:
+ *   - ERANGE: timer_tick_ns is not in supported range.
+ *   - ENOMEM: unable to allocate sufficient memory for adapter instances
+ *   - EINVAL: invalid event device identifier specified in config
+ *   - ENOSPC: maximum number of adapters already created
+ */
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_create_ext(
+		const struct rte_event_timer_adapter_conf *conf,
+		rte_event_timer_adapter_port_conf_cb_t conf_cb,
+		void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Timer adapter info structure.
+ */
+struct rte_event_timer_adapter_info {
+	uint64_t min_resolution_ns;
+	/**< Minimum timer adapter resolution in ns */
+	uint64_t max_tmo_ns;
+	/**< Maximum timer timeout(expire) in ns */
+	struct rte_event_timer_adapter_conf conf;
+	/**< Configured timer adapter attributes */
+	uint32_t caps;
+	/**< Event timer adapter capabilities */
+	int16_t event_dev_port_id;
+	/**< Event device port ID, if applicable */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the contextual information of an event timer adapter.
+ *
+ * @param adapter
+ *   A pointer to the event timer adapter structure.
+ *
+ * @param[out] adapter_info
+ *   A pointer to a structure of type *rte_event_timer_adapter_info* to be
+ *   filled with the contextual information of the adapter.
+ *
+ * @return
+ *   - 0: Success, driver updates the contextual information of the
+ *   timer adapter
+ *   - <0: Error code returned by the driver info get function.
+ *   - -EINVAL: adapter identifier invalid
+ *
+ * @see RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES,
+ *   struct rte_event_timer_adapter_info
+ *
+ */
+int __rte_experimental
+rte_event_timer_adapter_get_info(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_info *adapter_info);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start a timer adapter.
+ *
+ * The adapter start step is the last one and consists of setting the timer
+ * adapter to start accepting the timers and schedules to event queues.
+ *
+ * On success, all basic functions exported by the API (timer arm,
+ * timer cancel and so on) can be invoked.
+ *
+ * @param adapter
+ *   A pointer to the event timer adapter structure.
+ *
+ * @return
+ *   - 0: Success, adapter started.
+ *   - <0: Error code returned by the driver start function.
+ *   - -EINVAL if adapter identifier invalid
+ *   - -ENOENT if software adapter but no service core mapped
+ *   - -ENOTSUP if software adapter and more than one service core mapped
+ */
+int __rte_experimental
+rte_event_timer_adapter_start(
+		const struct rte_event_timer_adapter *adapter);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop an event timer adapter.
+ *
+ * The adapter can be restarted with a call to
+ * ``rte_event_timer_adapter_start()``.
+ *
+ * @param adapter
+ *   A pointer to the event timer adapter structure.
+ *
+ * @return
+ *   - 0: Success, adapter stopped.
+ *   - <0: Error code returned by the driver stop function.
+ *   - -EINVAL if adapter identifier invalid
+ */
+int __rte_experimental
+rte_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lookup an event timer adapter using its identifier.
+ *
+ * If an event timer adapter was created in another process with the same
+ * identifier, this function will locate its state and set up access to it
+ * so that it can be used in this process.
+ *
+ * @param adapter_id
+ *  The event timer adapter identifier.
+ *
+ * @return
+ *  A pointer to the event timer adapter matching the identifier on success.
+ *  NULL on error with rte_errno set appropriately.
+ *  Possible rte_errno values include:
+ *   - ENOENT - requested entry not available to return.
+ */
+struct rte_event_timer_adapter * __rte_experimental
+rte_event_timer_adapter_lookup(uint16_t adapter_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free an event timer adapter.
+ *
+ * Destroy an event timer adapter, freeing all resources.
+ *
+ * Before invoking this function, the application must wait for all the
+ * armed timers to expire or cancel the outstanding armed timers.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ *
+ * @return
+ *   - 0: Successfully freed the event timer adapter resources.
+ *   - <0: Failed to free the event timer adapter resources.
+ *   - -EAGAIN:  adapter is busy; timers outstanding
+ *   - -EBUSY: stop hasn't been called for this adapter yet
+ *   - -EINVAL: adapter id invalid, or adapter invalid
+ */
+int __rte_experimental
+rte_event_timer_adapter_free(struct rte_event_timer_adapter *adapter);
+
+/**
+ * Retrieve the service ID of the event timer adapter. If the adapter doesn't
+ * use an rte_service function, this function returns -ESRCH.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter.
+ *
+ * @param [out] service_id
+ *   A pointer to a uint32_t, to be filled in with the service id.
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ *   - -ESRCH: the adapter does not require a service to operate
+ */
+int __rte_experimental
+rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
+				       uint32_t *service_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an event timer adapter instance.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ * @param[out] stats
+ *   A pointer to a structure to fill with statistics.
+ *
+ * @return
+ *   - 0: Successfully retrieved.
+ *   - <0: Failure; error code returned.
+ */
+int __rte_experimental
+rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an event timer adapter instance.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ *
+ * @return
+ *   - 0: Successfully reset;
+ *   - <0: Failure; error code returned.
+ */
+int __rte_experimental rte_event_timer_adapter_stats_reset(
+		struct rte_event_timer_adapter *adapter);
+
+/**
+ * Retrieve the service ID of the event timer adapter. If the adapter doesn't
+ * use an rte_service function, this function returns -ESRCH.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter.
+ *
+ * @param [out] service_id
+ *   A pointer to a uint32_t, to be filled in with the service id.
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure, if the event dev doesn't use a rte_service
+ *   function, this function returns -ESRCH.
+ */
+int
+rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
+				       uint32_t *service_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an event timer adapter instance.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ * @param[out] stats
+ *   A pointer to a structure to fill with statistics.
+ *
+ * @return
+ *   - 0: Successfully retrieved.
+ *   - <0: Failure; error code returned.
+ */
+int rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
+				struct rte_event_timer_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an event timer adapter instance.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ *
+ * @return
+ *   - 0: Successfully reset;
+ *   - <0: Failure; error code returned.
+ */
+int rte_event_timer_adapter_stats_reset(
+				struct rte_event_timer_adapter *adapter);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * Event timer state.
+ */
+enum rte_event_timer_state {
+	RTE_EVENT_TIMER_NOT_ARMED	= 0,
+	/**< Event timer not armed. */
+	RTE_EVENT_TIMER_ARMED		= 1,
+	/**< Event timer successfully armed. */
+	RTE_EVENT_TIMER_CANCELED	= 2,
+	/**< Event timer successfully canceled. */
+	RTE_EVENT_TIMER_ERROR		= -1,
+	/**< Generic event timer error. */
+	RTE_EVENT_TIMER_ERROR_TOOEARLY	= -2,
+	/**< Event timer timeout tick value is too small for the adapter to
+	 * handle, given its configured resolution.
+	 */
+	RTE_EVENT_TIMER_ERROR_TOOLATE	= -3,
+	/**< Event timer timeout tick is greater than the maximum timeout.*/
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * The generic *rte_event_timer* structure to hold the event timer attributes
+ * for arm and cancel operations.
+ */
+RTE_STD_C11
+struct rte_event_timer {
+	struct rte_event ev;
+	/**<
+	 * Expiry event attributes.  On successful event timer timeout,
+	 * the following attributes will be used to inject the expiry event to
+	 * the eventdev:
+	 *  - event_queue_id: Targeted event queue id for expiry events.
+	 *  - event_priority: Event priority of the event expiry event in the
+	 *  event queue relative to other events.
+	 *  - sched_type: Scheduling type of the expiry event.
+	 *  - flow_id: Flow id of the expiry event.
+	 *  - op: RTE_EVENT_OP_NEW
+	 *  - event_type: RTE_EVENT_TYPE_TIMER
+	 */
+	volatile enum rte_event_timer_state state;
+	/**< State of the event timer. */
+	uint64_t timeout_ticks;
+	/**< Expiry timer ticks expressed in number of *timer_ticks_ns* from
+	 * now.
+	 * @see struct rte_event_timer_adapter_info::adapter_conf::timer_tick_ns
+	 */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An event timer adapter implementation use this field to hold
+	 * implementation specific values to share between the arm and cancel
+	 * operations.  The application should not modify this field.
+	 */
+	uint8_t user_meta[0];
+	/**< Memory to store user specific metadata.
+	 * The event timer adapter implementation should not modify this area.
+	 */
+} __rte_cache_aligned;
+
+typedef uint16_t (*rte_event_timer_arm_burst_t)(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer **tims,
+		uint16_t nb_tims);
+/**< @internal Enable event timers to enqueue timer events upon expiry */
+typedef uint16_t (*rte_event_timer_arm_tmo_tick_burst_t)(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer **tims,
+		uint64_t timeout_tick,
+		uint16_t nb_tims);
+/**< @internal Enable event timers with common expiration time */
+typedef uint16_t (*rte_event_timer_cancel_burst_t)(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer **tims,
+		uint16_t nb_tims);
+/**< @internal Prevent event timers from enqueuing timer events */
+
+/**
+ * @internal Data structure associated with each event timer adapter.
+ */
+struct rte_event_timer_adapter {
+	rte_event_timer_arm_burst_t arm_burst;
+	/**< Pointer to driver arm_burst function. */
+	rte_event_timer_arm_tmo_tick_burst_t arm_tmo_tick_burst;
+	/**< Pointer to driver arm_tmo_tick_burst function. */
+	rte_event_timer_cancel_burst_t cancel_burst;
+	/**< Pointer to driver cancel function. */
+	struct rte_event_timer_adapter_data *data;
+	/**< Pointer to shared adapter data */
+	const struct rte_event_timer_adapter_ops *ops;
+	/**< Functions exported by adapter driver */
+
+	RTE_STD_C11
+	uint8_t allocated : 1;
+	/**< Flag to indicate that this adapter has been allocated */
+} __rte_cache_aligned;
+
+#define ADAPTER_VALID_OR_ERR_RET(adapter, retval) do {		\
+	if (adapter == NULL || !adapter->allocated)		\
+		return retval;					\
+} while (0)
+
+#define FUNC_PTR_OR_ERR_RET(func, errval) do { 			\
+	if ((func) == NULL)					\
+		return errval;					\
+} while (0)
+
+#define FUNC_PTR_OR_NULL_RET_WITH_ERRNO(func, errval) do { 	\
+	if ((func) == NULL) {					\
+		rte_errno = errval;				\
+		return NULL;					\
+	}							\
+} while (0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Arm a burst of event timers with separate expiration timeout tick for each
+ * event timer.
+ *
+ * Before calling this function, the application allocates
+ * ``struct rte_event_timer`` objects from mempool or huge page backed
+ * application buffers of desired size. On successful allocation,
+ * application updates the `struct rte_event_timer`` attributes such as
+ * expiry event attributes, timeout ticks from now.
+ * This function submits the event timer arm requests to the event timer adapter
+ * and on expiry, the events will be injected to designated event queue.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ * @param evtims
+ *   Pointer to an array of objects of type *rte_event_timer* structure.
+ * @param nb_evtims
+ *   Number of event timers in the supplied array.
+ *
+ * @return
+ *   The number of successfully armed event timers. The return value can be less
+ *   than the value of the *nb_evtims* parameter. If the return value is less
+ *   than *nb_evtims*, the remaining event timers at the end of *evtims*
+ *   are not consumed, and the caller has to take care of them, and rte_errno
+ *   is set accordingly. Possible errno values include:
+ *   - EINVAL Invalid timer adapter, expiry event queue ID is invalid, or an
+ *   expiry event's sched type doesn't match the capabilities of the
+ *   destination event queue.
+ *   - EAGAIN Specified timer adapter is not running
+ *   - EALREADY A timer was encountered that was already armed
+ */
+static inline uint16_t __rte_experimental
+rte_event_timer_arm_burst(const struct rte_event_timer_adapter *adapter,
+			  struct rte_event_timer **evtims,
+			  uint16_t nb_evtims)
+{
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->arm_burst, -EINVAL);
+#endif
+	return adapter->arm_burst(adapter, evtims, nb_evtims);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Arm a burst of event timers with same expiration timeout tick.
+ *
+ * Provides the same functionality as ``rte_event_timer_arm_burst()``, except
+ * that application can use this API when all the event timers have the
+ * same timeout expiration tick. This specialized function can provide the
+ * additional hint to the adapter implementation and optimize if possible.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ * @param evtims
+ *   Points to an array of objects of type *rte_event_timer* structure.
+ * @param timeout_ticks
+ *   The number of ticks in which the timers should expire.
+ * @param nb_evtims
+ *   Number of event timers in the supplied array.
+ *
+ * @return
+ *   The number of successfully armed event timers. The return value can be less
+ *   than the value of the *nb_evtims* parameter. If the return value is less
+ *   than *nb_evtims*, the remaining event timers at the end of *evtims*
+ *   are not consumed, and the caller has to take care of them, and rte_errno
+ *   is set accordingly. Possible errno values include:
+ *   - EINVAL Invalid timer adapter, expiry event queue ID is invalid, or an
+ *   expiry event's sched type doesn't match the capabilities of the
+ *   destination event queue.
+ *   - EAGAIN Specified event timer adapter is not running
+ *   - EALREADY A timer was encountered that was already armed
+ */
+static inline uint16_t __rte_experimental
+rte_event_timer_arm_tmo_tick_burst(
+			const struct rte_event_timer_adapter *adapter,
+			struct rte_event_timer **evtims,
+			const uint64_t timeout_ticks,
+			const uint16_t nb_evtims)
+{
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->arm_tmo_tick_burst, -EINVAL);
+#endif
+	return adapter->arm_tmo_tick_burst(adapter, evtims, timeout_ticks,
+					   nb_evtims);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Cancel a burst of event timers from being scheduled to the event device.
+ *
+ * @param adapter
+ *   A pointer to an event timer adapter structure.
+ * @param evtims
+ *   Points to an array of objects of type *rte_event_timer* structure
+ * @param nb_evtims
+ *   Number of event timer instances in the supplied array.
+ *
+ * @return
+ *   The number of successfully canceled event timers. The return value can be
+ *   less than the value of the *nb_evtims* parameter. If the return value is
+ *   less than *nb_evtims*, the remaining event timers at the end of *evtims*
+ *   are not consumed, and the caller has to take care of them, and rte_errno
+ *   is set accordingly. Possible errno values include:
+ *   - EINVAL Invalid timer adapter identifier
+ *   - EAGAIN Specified timer adapter is not running
+ *   - EALREADY  A timer was encountered that was already canceled
+ */
+static inline uint16_t __rte_experimental
+rte_event_timer_cancel_burst(const struct rte_event_timer_adapter *adapter,
+			     struct rte_event_timer **evtims,
+			     uint16_t nb_evtims)
+{
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
+	FUNC_PTR_OR_ERR_RET(adapter->cancel_burst, -EINVAL);
+#endif
+	return adapter->cancel_burst(adapter, evtims, nb_evtims);
+}
+
+#endif /* __RTE_EVENT_TIMER_ADAPTER_H__ */
diff --git a/lib/librte_eventdev/rte_event_timer_adapter_pmd.h b/lib/librte_eventdev/rte_event_timer_adapter_pmd.h
new file mode 100644
index 00000000..cf3509dc
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_timer_adapter_pmd.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef __RTE_EVENT_TIMER_ADAPTER_PMD_H__
+#define __RTE_EVENT_TIMER_ADAPTER_PMD_H__
+
+/**
+ * @file
+ * RTE Event Timer Adapter API (PMD Side)
+ *
+ * @note
+ * This file provides implementation helpers for internal use by PMDs.  They
+ * are not intended to be exposed to applications and are not subject to ABI
+ * versioning.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rte_event_timer_adapter.h"
+
+/*
+ * Definitions of functions exported by an event timer adapter implementation
+ * through *rte_event_timer_adapter_ops* structure supplied in the
+ * *rte_event_timer_adapter* structure associated with an event timer adapter.
+ */
+
+typedef int (*rte_event_timer_adapter_init_t)(
+		struct rte_event_timer_adapter *adapter);
+/**< @internal Event timer adapter implementation setup */
+typedef int (*rte_event_timer_adapter_uninit_t)(
+		struct rte_event_timer_adapter *adapter);
+/**< @internal Event timer adapter implementation teardown */
+typedef int (*rte_event_timer_adapter_start_t)(
+		const struct rte_event_timer_adapter *adapter);
+/**< @internal Start running event timer adapter */
+typedef int (*rte_event_timer_adapter_stop_t)(
+		const struct rte_event_timer_adapter *adapter);
+/**< @internal Stop running event timer adapter */
+typedef void (*rte_event_timer_adapter_get_info_t)(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_info *adapter_info);
+/**< @internal Get contextual information for event timer adapter */
+typedef int (*rte_event_timer_adapter_stats_get_t)(
+		const struct rte_event_timer_adapter *adapter,
+		struct rte_event_timer_adapter_stats *stats);
+/**< @internal Get statistics for event timer adapter */
+typedef int (*rte_event_timer_adapter_stats_reset_t)(
+		const struct rte_event_timer_adapter *adapter);
+/**< @internal Reset statistics for event timer adapter */
+
+/**
+ * @internal Structure containing the functions exported by an event timer
+ * adapter implementation.
+ */
+struct rte_event_timer_adapter_ops {
+	rte_event_timer_adapter_init_t		init;  /**< Set up adapter */
+	rte_event_timer_adapter_uninit_t	uninit;/**< Tear down adapter */
+	rte_event_timer_adapter_start_t		start; /**< Start adapter */
+	rte_event_timer_adapter_stop_t		stop;  /**< Stop adapter */
+	rte_event_timer_adapter_get_info_t	get_info;
+	/**< Get info from driver */
+	rte_event_timer_adapter_stats_get_t	stats_get;
+	/**< Get adapter statistics */
+	rte_event_timer_adapter_stats_reset_t	stats_reset;
+	/**< Reset adapter statistics */
+	rte_event_timer_arm_burst_t		arm_burst;
+	/**< Arm one or more event timers */
+	rte_event_timer_arm_tmo_tick_burst_t	arm_tmo_tick_burst;
+	/**< Arm event timers with same expiration time */
+	rte_event_timer_cancel_burst_t		cancel_burst;
+	/**< Cancel one or more event timers */
+};
+
+/**
+ * @internal Adapter data; structure to be placed in shared memory to be
+ * accessible by various processes in a multi-process configuration.
+ */
+struct rte_event_timer_adapter_data {
+	uint8_t id;
+	/**< Event timer adapter ID */
+	uint8_t event_dev_id;
+	/**< Event device ID */
+	uint32_t socket_id;
+	/**< Socket ID where memory is allocated */
+	uint8_t event_port_id;
+	/**< Optional: event port ID used when the inbuilt port is absent */
+	const struct rte_memzone *mz;
+	/**< Event timer adapter memzone pointer */
+	struct rte_event_timer_adapter_conf conf;
+	/**< Configuration used to configure the adapter. */
+	uint32_t caps;
+	/**< Adapter capabilities */
+	void *adapter_priv;
+	/**< Timer adapter private data*/
+	uint8_t service_inited;
+	/**< Service initialization state */
+	uint32_t service_id;
+	/**< Service ID*/
+
+	RTE_STD_C11
+	uint8_t started : 1;
+	/**< Flag to indicate adapter started. */
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_EVENT_TIMER_ADAPTER_PMD_H__ */
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 851a1190..801810ed 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -29,6 +29,8 @@
 #include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_ethdev.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
@@ -55,16 +57,21 @@ int
 rte_event_dev_get_dev_id(const char *name)
 {
 	int i;
+	uint8_t cmp;
 
 	if (!name)
 		return -EINVAL;
 
-	for (i = 0; i < rte_eventdev_globals->nb_devs; i++)
-		if ((strcmp(rte_event_devices[i].data->name, name)
-				== 0) &&
-				(rte_event_devices[i].attached ==
-						RTE_EVENTDEV_ATTACHED))
+	for (i = 0; i < rte_eventdev_globals->nb_devs; i++) {
+		cmp = (strncmp(rte_event_devices[i].data->name, name,
+				RTE_EVENTDEV_NAME_MAX_LEN) == 0) ||
+			(rte_event_devices[i].dev ? (strncmp(
+				rte_event_devices[i].dev->driver->name, name,
+					 RTE_EVENTDEV_NAME_MAX_LEN) == 0) : 0);
+		if (cmp && (rte_event_devices[i].attached ==
+					RTE_EVENTDEV_ATTACHED))
 			return i;
+	}
 	return -ENODEV;
 }
 
@@ -123,6 +130,51 @@ rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
 				: 0;
 }
 
+int __rte_experimental
+rte_event_timer_adapter_caps_get(uint8_t dev_id, uint32_t *caps)
+{
+	struct rte_eventdev *dev;
+	const struct rte_event_timer_adapter_ops *ops;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+	dev = &rte_eventdevs[dev_id];
+
+	if (caps == NULL)
+		return -EINVAL;
+	*caps = 0;
+
+	return dev->dev_ops->timer_adapter_caps_get ?
+				(*dev->dev_ops->timer_adapter_caps_get)(dev,
+									0,
+									caps,
+									&ops)
+				: 0;
+}
+
+int __rte_experimental
+rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
+				  uint32_t *caps)
+{
+	struct rte_eventdev *dev;
+	struct rte_cryptodev *cdev;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	if (!rte_cryptodev_pmd_is_valid_dev(cdev_id))
+		return -EINVAL;
+
+	dev = &rte_eventdevs[dev_id];
+	cdev = rte_cryptodev_pmd_get_dev(cdev_id);
+
+	if (caps == NULL)
+		return -EINVAL;
+	*caps = 0;
+
+	return dev->dev_ops->crypto_adapter_caps_get ?
+		(*dev->dev_ops->crypto_adapter_caps_get)
+		(dev, cdev, caps) : -ENOTSUP;
+}
+
 static inline int
 rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues)
 {
@@ -1123,6 +1175,23 @@ rte_event_dev_start(uint8_t dev_id)
 	return 0;
 }
 
+int
+rte_event_dev_stop_flush_callback_register(uint8_t dev_id,
+		eventdev_stop_flush_t callback, void *userdata)
+{
+	struct rte_eventdev *dev;
+
+	RTE_EDEV_LOG_DEBUG("Stop flush register dev_id=%" PRIu8, dev_id);
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	dev = &rte_eventdevs[dev_id];
+
+	dev->dev_ops->dev_stop_flush = callback;
+	dev->data->dev_stop_flush_arg = userdata;
+
+	return 0;
+}
+
 void
 rte_event_dev_stop(uint8_t dev_id)
 {
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index b21c2717..b6fd6ee7 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1,35 +1,8 @@
-/*
- *   BSD LICENSE
- *
- *   Copyright 2016 Cavium, Inc.
- *   Copyright 2016 Intel Corporation.
- *   Copyright 2016 NXP.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Cavium, Inc nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc.
+ * Copyright(c) 2016-2018 Intel Corporation.
+ * Copyright 2016 NXP
+ * All rights reserved.
  */
 
 #ifndef _RTE_EVENTDEV_H_
@@ -244,6 +217,7 @@ extern "C" {
 #include <rte_errno.h>
 
 struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */
+struct rte_event;
 
 /* Event device capability bitmap flags */
 #define RTE_EVENT_DEV_CAP_QUEUE_QOS           (1ULL << 0)
@@ -835,15 +809,60 @@ int
 rte_event_dev_start(uint8_t dev_id);
 
 /**
- * Stop an event device. The device can be restarted with a call to
- * rte_event_dev_start()
+ * Stop an event device.
+ *
+ * This function causes all queued events to be drained, including those
+ * residing in event ports. While draining events out of the device, this
+ * function calls the user-provided flush callback (if one was registered) once
+ * per event.
+ *
+ * The device can be restarted with a call to rte_event_dev_start(). Threads
+ * that continue to enqueue/dequeue while the device is stopped, or being
+ * stopped, will result in undefined behavior. This includes event adapters,
+ * which must be stopped prior to stopping the eventdev.
  *
  * @param dev_id
  *   Event device identifier.
+ *
+ * @see rte_event_dev_stop_flush_callback_register()
  */
 void
 rte_event_dev_stop(uint8_t dev_id);
 
+typedef void (*eventdev_stop_flush_t)(uint8_t dev_id, struct rte_event event,
+		void *arg);
+/**< Callback function called during rte_event_dev_stop(), invoked once per
+ * flushed event.
+ */
+
+/**
+ * Registers a callback function to be invoked during rte_event_dev_stop() for
+ * each flushed event. This function can be used to properly dispose of queued
+ * events, for example events containing memory pointers.
+ *
+ * The callback function is only registered for the calling process. The
+ * callback function must be registered in every process that can call
+ * rte_event_dev_stop().
+ *
+ * To unregister a callback, call this function with a NULL callback pointer.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param callback
+ *   Callback function invoked once per flushed event.
+ * @param userdata
+ *   Argument supplied to callback.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -EINVAL if *dev_id* is invalid
+ *
+ * @see rte_event_dev_stop()
+ */
+int
+rte_event_dev_stop_flush_callback_register(uint8_t dev_id,
+		eventdev_stop_flush_t callback, void *userdata);
+
 /**
  * Close an event device. The device cannot be restarted!
  *
@@ -923,8 +942,8 @@ rte_event_dev_close(uint8_t dev_id);
 /**< The event generated from ethdev subsystem */
 #define RTE_EVENT_TYPE_CRYPTODEV        0x1
 /**< The event generated from crypodev subsystem */
-#define RTE_EVENT_TYPE_TIMERDEV         0x2
-/**< The event generated from timerdev subsystem */
+#define RTE_EVENT_TYPE_TIMER		0x2
+/**< The event generated from event timer adapter */
 #define RTE_EVENT_TYPE_CPU              0x3
 /**< The event generated from cpu for pipelining.
  * Application may use *sub_event_type* to further classify the event
@@ -1096,7 +1115,77 @@ int
 rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
 				uint32_t *caps);
 
-struct rte_eventdev_driver;
+#define RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT (1ULL << 0)
+/**< This flag is set when the timer mechanism is in HW. */
+
+/**
+ * Retrieve the event device's timer adapter capabilities.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @param[out] caps
+ *   A pointer to memory to be filled with event timer adapter capabilities.
+ *
+ * @return
+ *   - 0: Success, driver provided event timer adapter capabilities.
+ *   - <0: Error code returned by the driver function.
+ */
+int __rte_experimental
+rte_event_timer_adapter_caps_get(uint8_t dev_id, uint32_t *caps);
+
+/* Crypto adapter capability bitmap flag */
+#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW   0x1
+/**< Flag indicates HW is capable of generating events in
+ * RTE_EVENT_OP_NEW enqueue operation. Cryptodev will send
+ * packets to the event device as new events using an internal
+ * event port.
+ */
+
+#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD   0x2
+/**< Flag indicates HW is capable of generating events in
+ * RTE_EVENT_OP_FORWARD enqueue operation. Cryptodev will send
+ * packets to the event device as forwarded event using an
+ * internal event port.
+ */
+
+#define RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND  0x4
+/**< Flag indicates HW is capable of mapping crypto queue pair to
+ * event queue.
+ */
+
+#define RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA   0x8
+/**< Flag indicates HW/SW suports a mechanism to store and retrieve
+ * the private data information along with the crypto session.
+ */
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the event device's crypto adapter capabilities for the
+ * specified cryptodev device
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @param cdev_id
+ *   The identifier of the cryptodev device.
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with event adapter capabilities.
+ *   It is expected to be pre-allocated & initialized by caller.
+ *
+ * @return
+ *   - 0: Success, driver provides event adapter capabilities for the
+ *     cryptodev device.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+int __rte_experimental
+rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
+				  uint32_t *caps);
+
 struct rte_eventdev_ops;
 struct rte_eventdev;
 
@@ -1152,6 +1241,8 @@ struct rte_eventdev_data {
 	/* Service initialization state */
 	uint32_t service_id;
 	/* Service ID*/
+	void *dev_stop_flush_arg;
+	/**< User-provided argument for event flush function */
 
 	RTE_STD_C11
 	uint8_t dev_started : 1;
@@ -1178,7 +1269,7 @@ struct rte_eventdev {
 
 	struct rte_eventdev_data *data;
 	/**< Pointer to device data */
-	const struct rte_eventdev_ops *dev_ops;
+	struct rte_eventdev_ops *dev_ops;
 	/**< Functions exported by PMD */
 	struct rte_device *dev;
 	/**< Device info. supplied by probing */
diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h
index 31343b51..3fbb4d2b 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd.h
@@ -26,6 +26,7 @@ extern "C" {
 #include <rte_malloc.h>
 
 #include "rte_eventdev.h"
+#include "rte_event_timer_adapter_pmd.h"
 
 /* Logging Macros */
 #define RTE_EDEV_LOG_ERR(...) \
@@ -69,6 +70,9 @@ extern "C" {
 		((RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) | \
 			(RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ))
 
+#define RTE_EVENT_CRYPTO_ADAPTER_SW_CAP \
+		RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA
+
 /**< Ethernet Rx adapter cap to return If the packet transfers from
  * the ethdev to eventdev use a SW service function
  */
@@ -449,6 +453,37 @@ typedef int (*eventdev_eth_rx_adapter_caps_get_t)
 struct rte_event_eth_rx_adapter_queue_conf *queue_conf;
 
 /**
+ * Retrieve the event device's timer adapter capabilities, as well as the ops
+ * structure that an event timer adapter should call through to enter the
+ * driver
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param flags
+ *   Flags that can be used to determine how to select an event timer
+ *   adapter ops structure
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with Rx event adapter capabilities.
+ *
+ * @param[out] ops
+ *   A pointer to the ops pointer to set with the address of the desired ops
+ *   structure
+ *
+ * @return
+ *   - 0: Success, driver provides Rx event adapter capabilities for the
+ *	ethernet device.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_timer_adapter_caps_get_t)(
+				const struct rte_eventdev *dev,
+				uint64_t flags,
+				uint32_t *caps,
+				const struct rte_event_timer_adapter_ops **ops);
+
+/**
  * Add ethernet Rx queues to event device. This callback is invoked if
  * the caps returned from rte_eventdev_eth_rx_adapter_caps_get(, eth_port_id)
  * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set.
@@ -585,6 +620,175 @@ typedef int (*eventdev_eth_rx_adapter_stats_reset)
  */
 typedef int (*eventdev_selftest)(void);
 
+
+struct rte_cryptodev;
+
+/**
+ * This API may change without prior notice
+ *
+ * Retrieve the event device's crypto adapter capabilities for the
+ * specified cryptodev
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   cryptodev pointer
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with event adapter capabilities.
+ *   It is expected to be pre-allocated & initialized by caller.
+ *
+ * @return
+ *   - 0: Success, driver provides event adapter capabilities for the
+ *	cryptodev.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_crypto_adapter_caps_get_t)
+					(const struct rte_eventdev *dev,
+					 const struct rte_cryptodev *cdev,
+					 uint32_t *caps);
+
+/**
+ * This API may change without prior notice
+ *
+ * Add crypto queue pair to event device. This callback is invoked if
+ * the caps returned from rte_event_crypto_adapter_caps_get(, cdev_id)
+ * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   cryptodev pointer
+ *
+ * @param queue_pair_id
+ *   cryptodev queue pair identifier.
+ *
+ * @param event
+ *  Event information required for binding cryptodev queue pair to event queue.
+ *  This structure will have a valid value for only those HW PMDs supporting
+ *  @see RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND capability.
+ *
+ * @return
+ *   - 0: Success, cryptodev queue pair added successfully.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_crypto_adapter_queue_pair_add_t)
+			(const struct rte_eventdev *dev,
+			 const struct rte_cryptodev *cdev,
+			 int32_t queue_pair_id,
+			 const struct rte_event *event);
+
+
+/**
+ * This API may change without prior notice
+ *
+ * Delete crypto queue pair to event device. This callback is invoked if
+ * the caps returned from rte_event_crypto_adapter_caps_get(, cdev_id)
+ * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   cryptodev pointer
+ *
+ * @param queue_pair_id
+ *   cryptodev queue pair identifier.
+ *
+ * @return
+ *   - 0: Success, cryptodev queue pair deleted successfully.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_crypto_adapter_queue_pair_del_t)
+					(const struct rte_eventdev *dev,
+					 const struct rte_cryptodev *cdev,
+					 int32_t queue_pair_id);
+
+/**
+ * Start crypto adapter. This callback is invoked if
+ * the caps returned from rte_event_crypto_adapter_caps_get(.., cdev_id)
+ * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set and queue pairs
+ * from cdev_id have been added to the event device.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   Crypto device pointer
+ *
+ * @return
+ *   - 0: Success, crypto adapter started successfully.
+ *   - <0: Error code returned by the driver function.
+ */
+typedef int (*eventdev_crypto_adapter_start_t)
+					(const struct rte_eventdev *dev,
+					 const struct rte_cryptodev *cdev);
+
+/**
+ * Stop crypto adapter. This callback is invoked if
+ * the caps returned from rte_event_crypto_adapter_caps_get(.., cdev_id)
+ * has RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_* set and queue pairs
+ * from cdev_id have been added to the event device.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   Crypto device pointer
+ *
+ * @return
+ *   - 0: Success, crypto adapter stopped successfully.
+ *   - <0: Error code returned by the driver function.
+ */
+typedef int (*eventdev_crypto_adapter_stop_t)
+					(const struct rte_eventdev *dev,
+					 const struct rte_cryptodev *cdev);
+
+struct rte_event_crypto_adapter_stats;
+
+/**
+ * Retrieve crypto adapter statistics.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   Crypto device pointer
+ *
+ * @param[out] stats
+ *   Pointer to stats structure
+ *
+ * @return
+ *   Return 0 on success.
+ */
+
+typedef int (*eventdev_crypto_adapter_stats_get)
+			(const struct rte_eventdev *dev,
+			 const struct rte_cryptodev *cdev,
+			 struct rte_event_crypto_adapter_stats *stats);
+
+/**
+ * Reset crypto adapter statistics.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param cdev
+ *   Crypto device pointer
+ *
+ * @return
+ *   Return 0 on success.
+ */
+
+typedef int (*eventdev_crypto_adapter_stats_reset)
+			(const struct rte_eventdev *dev,
+			 const struct rte_cryptodev *cdev);
+
 /** Event device operations function pointer table */
 struct rte_eventdev_ops {
 	eventdev_info_get_t dev_infos_get;	/**< Get device info. */
@@ -640,8 +844,29 @@ struct rte_eventdev_ops {
 	eventdev_eth_rx_adapter_stats_reset eth_rx_adapter_stats_reset;
 	/**< Reset ethernet Rx stats */
 
+	eventdev_timer_adapter_caps_get_t timer_adapter_caps_get;
+	/**< Get timer adapter capabilities */
+
+	eventdev_crypto_adapter_caps_get_t crypto_adapter_caps_get;
+	/**< Get crypto adapter capabilities */
+	eventdev_crypto_adapter_queue_pair_add_t crypto_adapter_queue_pair_add;
+	/**< Add queue pair to crypto adapter */
+	eventdev_crypto_adapter_queue_pair_del_t crypto_adapter_queue_pair_del;
+	/**< Delete queue pair from crypto adapter */
+	eventdev_crypto_adapter_start_t crypto_adapter_start;
+	/**< Start crypto adapter */
+	eventdev_crypto_adapter_stop_t crypto_adapter_stop;
+	/**< Stop crypto adapter */
+	eventdev_crypto_adapter_stats_get crypto_adapter_stats_get;
+	/**< Get crypto stats */
+	eventdev_crypto_adapter_stats_reset crypto_adapter_stats_reset;
+	/**< Reset crypto stats */
+
 	eventdev_selftest dev_selftest;
 	/**< Start eventdev Selftest */
+
+	eventdev_stop_flush_t dev_stop_flush;
+	/**< User-provided event flush function */
 };
 
 /**
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index 2aef470b..12835e9f 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -66,7 +66,6 @@ DPDK_17.11 {
 	rte_event_eth_rx_adapter_stats_get;
 	rte_event_eth_rx_adapter_stats_reset;
 	rte_event_eth_rx_adapter_stop;
-
 } DPDK_17.08;
 
 DPDK_18.02 {
@@ -74,3 +73,41 @@ DPDK_18.02 {
 
 	rte_event_dev_selftest;
 } DPDK_17.11;
+
+DPDK_18.05 {
+	global:
+
+	rte_event_dev_stop_flush_callback_register;
+} DPDK_18.02;
+
+EXPERIMENTAL {
+	global:
+
+	rte_event_crypto_adapter_caps_get;
+	rte_event_crypto_adapter_create;
+	rte_event_crypto_adapter_create_ext;
+	rte_event_crypto_adapter_event_port_get;
+	rte_event_crypto_adapter_free;
+	rte_event_crypto_adapter_queue_pair_add;
+	rte_event_crypto_adapter_queue_pair_del;
+	rte_event_crypto_adapter_service_id_get;
+	rte_event_crypto_adapter_start;
+	rte_event_crypto_adapter_stats_get;
+	rte_event_crypto_adapter_stats_reset;
+	rte_event_crypto_adapter_stop;
+	rte_event_eth_rx_adapter_cb_register;
+	rte_event_timer_adapter_caps_get;
+	rte_event_timer_adapter_create;
+	rte_event_timer_adapter_create_ext;
+	rte_event_timer_adapter_free;
+	rte_event_timer_adapter_get_info;
+	rte_event_timer_adapter_lookup;
+	rte_event_timer_adapter_service_id_get;
+	rte_event_timer_adapter_start;
+	rte_event_timer_adapter_stats_get;
+	rte_event_timer_adapter_stats_reset;
+	rte_event_timer_adapter_stop;
+	rte_event_timer_arm_burst;
+	rte_event_timer_arm_tmo_tick_burst;
+	rte_event_timer_cancel_burst;
+};
diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c
index 7edb2f15..4c3469da 100644
--- a/lib/librte_flow_classify/rte_flow_classify.c
+++ b/lib/librte_flow_classify/rte_flow_classify.c
@@ -635,9 +635,7 @@ action_apply(struct rte_flow_classifier *cls,
 		}
 		if (count) {
 			ret = 0;
-			ntuple_stats =
-				(struct rte_flow_classify_ipv4_5tuple_stats *)
-				stats->stats;
+			ntuple_stats = stats->stats;
 			ntuple_stats->counter1 = count;
 			ntuple_stats->ipv4_5tuple = rule->rules.u.ipv4_5tuple;
 		}
@@ -675,10 +673,7 @@ rte_flow_classifier_query(struct rte_flow_classifier *cls,
 	return ret;
 }
 
-RTE_INIT(librte_flow_classify_init_log);
-
-static void
-librte_flow_classify_init_log(void)
+RTE_INIT(librte_flow_classify_init_log)
 {
 	librte_flow_classify_logtype =
 		rte_log_register("lib.flow_classify");
diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.c b/lib/librte_flow_classify/rte_flow_classify_parse.c
index 10eaf043..f65ceaf7 100644
--- a/lib/librte_flow_classify/rte_flow_classify_parse.c
+++ b/lib/librte_flow_classify/rte_flow_classify_parse.c
@@ -279,7 +279,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 
 	}
 
-	ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask;
+	ipv4_mask = item->mask;
 	/**
 	 * Only support src & dst addresses, protocol,
 	 * others should be masked.
@@ -301,7 +301,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 	filter->src_ip_mask = ipv4_mask->hdr.src_addr;
 	filter->proto_mask  = ipv4_mask->hdr.next_proto_id;
 
-	ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec;
+	ipv4_spec = item->spec;
 	filter->dst_ip = ipv4_spec->hdr.dst_addr;
 	filter->src_ip = ipv4_spec->hdr.src_addr;
 	filter->proto  = ipv4_spec->hdr.next_proto_id;
@@ -339,7 +339,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 	}
 
 	if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
-		tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+		tcp_mask = item->mask;
 
 		/**
 		 * Only support src & dst ports, tcp flags,
@@ -373,12 +373,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 			return -EINVAL;
 		}
 
-		tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+		tcp_spec = item->spec;
 		filter->dst_port  = tcp_spec->hdr.dst_port;
 		filter->src_port  = tcp_spec->hdr.src_port;
 		filter->tcp_flags = tcp_spec->hdr.tcp_flags;
 	} else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
-		udp_mask = (const struct rte_flow_item_udp *)item->mask;
+		udp_mask = item->mask;
 
 		/**
 		 * Only support src & dst ports,
@@ -397,11 +397,11 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 		filter->dst_port_mask = udp_mask->hdr.dst_port;
 		filter->src_port_mask = udp_mask->hdr.src_port;
 
-		udp_spec = (const struct rte_flow_item_udp *)item->spec;
+		udp_spec = item->spec;
 		filter->dst_port = udp_spec->hdr.dst_port;
 		filter->src_port = udp_spec->hdr.src_port;
 	} else {
-		sctp_mask = (const struct rte_flow_item_sctp *)item->mask;
+		sctp_mask = item->mask;
 
 		/**
 		 * Only support src & dst ports,
@@ -420,7 +420,7 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 		filter->dst_port_mask = sctp_mask->hdr.dst_port;
 		filter->src_port_mask = sctp_mask->hdr.src_port;
 
-		sctp_spec = (const struct rte_flow_item_sctp *)item->spec;
+		sctp_spec = item->spec;
 		filter->dst_port = sctp_spec->hdr.dst_port;
 		filter->src_port = sctp_spec->hdr.src_port;
 	}
@@ -480,12 +480,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 	switch (act->type) {
 	case RTE_FLOW_ACTION_TYPE_COUNT:
 		action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_COUNT;
-		count = (const struct rte_flow_action_count *)act->conf;
+		count = act->conf;
 		memcpy(&action.act.counter, count, sizeof(action.act.counter));
 		break;
 	case RTE_FLOW_ACTION_TYPE_MARK:
 		action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_MARK;
-		mark_spec = (const struct rte_flow_action_mark *)act->conf;
+		mark_spec = act->conf;
 		memcpy(&action.act.mark, mark_spec, sizeof(action.act.mark));
 		break;
 	default:
@@ -502,12 +502,12 @@ classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
 	switch (act->type) {
 	case RTE_FLOW_ACTION_TYPE_COUNT:
 		action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_COUNT;
-		count = (const struct rte_flow_action_count *)act->conf;
+		count = act->conf;
 		memcpy(&action.act.counter, count, sizeof(action.act.counter));
 		break;
 	case RTE_FLOW_ACTION_TYPE_MARK:
 		action.action_mask |= 1LLU << RTE_FLOW_ACTION_TYPE_MARK;
-		mark_spec = (const struct rte_flow_action_mark *)act->conf;
+		mark_spec = act->conf;
 		memcpy(&action.act.mark, mark_spec, sizeof(action.act.mark));
 		break;
 	case RTE_FLOW_ACTION_TYPE_END:
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 3648ec09..1fac53a8 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -19,6 +19,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 5ca59745..6cd764ff 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -31,6 +31,9 @@
 		(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
 		 PKT_TX_TUNNEL_GRE))
 
+#define IS_IPV4_UDP(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4)) == \
+		(PKT_TX_UDP_SEG | PKT_TX_IPV4))
+
 /**
  * Internal function which updates the UDP header of a packet, following
  * segmentation. This is required to update the header's datagram length field.
diff --git a/lib/librte_gso/gso_udp4.c b/lib/librte_gso/gso_udp4.c
new file mode 100644
index 00000000..927dee12
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include "gso_common.h"
+#include "gso_udp4.h"
+
+#define IPV4_HDR_MF_BIT (1U << 13)
+
+static inline void
+update_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
+		uint16_t nb_segs)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t frag_offset = 0, is_mf;
+	uint16_t l2_hdrlen = pkt->l2_len, l3_hdrlen = pkt->l3_len;
+	uint16_t tail_idx = nb_segs - 1, length, i;
+
+	/*
+	 * Update IP header fields for output segments. Specifically,
+	 * keep the same IP id, update fragment offset and total
+	 * length.
+	 */
+	for (i = 0; i < nb_segs; i++) {
+		ipv4_hdr = rte_pktmbuf_mtod_offset(segs[i], struct ipv4_hdr *,
+			l2_hdrlen);
+		length = segs[i]->pkt_len - l2_hdrlen;
+		ipv4_hdr->total_length = rte_cpu_to_be_16(length);
+
+		is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
+		ipv4_hdr->fragment_offset =
+			rte_cpu_to_be_16(frag_offset | is_mf);
+		frag_offset += ((length - l3_hdrlen) >> 3);
+	}
+}
+
+int
+gso_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t pyld_unit_size, hdr_offset;
+	uint16_t frag_off;
+	int ret;
+
+	/* Don't process the fragmented packet */
+	ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+			pkt->l2_len);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	if (unlikely(IS_FRAGMENTED(frag_off))) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	/*
+	 * UDP fragmentation is the same as IP fragmentation.
+	 * Except the first one, other output packets just have l2
+	 * and l3 headers.
+	 */
+	hdr_offset = pkt->l2_len + pkt->l3_len;
+
+	/* Don't process the packet without data. */
+	if (unlikely(hdr_offset + pkt->l4_len >= pkt->pkt_len)) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	pyld_unit_size = gso_size - hdr_offset;
+
+	/* Segment the payload */
+	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+			indirect_pool, pkts_out, nb_pkts_out);
+	if (ret > 1)
+		update_ipv4_udp_headers(pkt, pkts_out, ret);
+
+	return ret;
+}
diff --git a/lib/librte_gso/gso_udp4.h b/lib/librte_gso/gso_udp4.h
new file mode 100644
index 00000000..b2a2908e
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _GSO_UDP4_H_
+#define _GSO_UDP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an UDP/IPv4 packet. This function doesn't check if the input
+ * packet has correct checksums, and doesn't update checksums for output
+ * GSO segments. Furthermore, it doesn't process IP fragment packets.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param gso_size
+ *  The max length of a GSO segment, measured in bytes.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when the function succeeds. If the memory space in
+ *  pkts_out is insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ *   - The number of GSO segments filled in pkts_out on success.
+ *   - Return -ENOMEM if run out of memory in MBUF pools.
+ *   - Return -EINVAL for invalid parameters.
+ */
+int gso_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build
index 056534fb..ad8dd858 100644
--- a/lib/librte_gso/meson.build
+++ b/lib/librte_gso/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('gso_common.c', 'gso_tcp4.c',
+sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
  		'gso_tunnel_tcp4.c', 'rte_gso.c')
 headers = files('rte_gso.h')
 deps += ['ethdev']
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index a44e3d43..751b5b62 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -11,6 +11,17 @@
 #include "gso_common.h"
 #include "gso_tcp4.h"
 #include "gso_tunnel_tcp4.h"
+#include "gso_udp4.h"
+
+#define ILLEGAL_UDP_GSO_CTX(ctx) \
+	((((ctx)->gso_types & DEV_TX_OFFLOAD_UDP_TSO) == 0) || \
+	 (ctx)->gso_size < RTE_GSO_UDP_SEG_SIZE_MIN)
+
+#define ILLEGAL_TCP_GSO_CTX(ctx) \
+	((((ctx)->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | \
+		DEV_TX_OFFLOAD_VXLAN_TNL_TSO | \
+		DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0) || \
+		(ctx)->gso_size < RTE_GSO_SEG_SIZE_MIN)
 
 int
 rte_gso_segment(struct rte_mbuf *pkt,
@@ -27,14 +38,12 @@ rte_gso_segment(struct rte_mbuf *pkt,
 
 	if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
 			nb_pkts_out < 1 ||
-			gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN ||
-			((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO |
-			DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
-			DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0))
+			(ILLEGAL_UDP_GSO_CTX(gso_ctx) &&
+			 ILLEGAL_TCP_GSO_CTX(gso_ctx)))
 		return -EINVAL;
 
 	if (gso_ctx->gso_size >= pkt->pkt_len) {
-		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+		pkt->ol_flags &= (~(PKT_TX_TCP_SEG | PKT_TX_UDP_SEG));
 		pkts_out[0] = pkt;
 		return 1;
 	}
@@ -59,6 +68,11 @@ rte_gso_segment(struct rte_mbuf *pkt,
 		ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
 				direct_pool, indirect_pool,
 				pkts_out, nb_pkts_out);
+	} else if (IS_IPV4_UDP(pkt->ol_flags) &&
+			(gso_ctx->gso_types & DEV_TX_OFFLOAD_UDP_TSO)) {
+		pkt->ol_flags &= (~PKT_TX_UDP_SEG);
+		ret = gso_udp4_segment(pkt, gso_size, direct_pool,
+				indirect_pool, pkts_out, nb_pkts_out);
 	} else {
 		/* unsupported packet, skip */
 		pkts_out[0] = pkt;
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
index f4abd61c..a626a11e 100644
--- a/lib/librte_gso/rte_gso.h
+++ b/lib/librte_gso/rte_gso.h
@@ -17,10 +17,14 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
-/* Minimum GSO segment size. */
+/* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
 		sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1)
 
+/* Minimum GSO segment size for UDP based packets. */
+#define RTE_GSO_UDP_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
+		sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr) + 1)
+
 /* GSO flags for rte_gso_ctx. */
 #define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
 /**< Use fixed IP ids for output GSO segments. Setting
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index e139e1d7..efc06ede 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -6,7 +6,6 @@ headers = files('rte_cmp_arm64.h',
 	'rte_cmp_x86.h',
 	'rte_crc_arm64.h',
 	'rte_cuckoo_hash.h',
-	'rte_cuckoo_hash_x86.h',
 	'rte_fbk_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 9b1387b5..f7b86c8c 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -31,9 +31,6 @@
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
-#if defined(RTE_ARCH_X86)
-#include "rte_cuckoo_hash_x86.h"
-#endif
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
 
@@ -93,8 +90,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	void *buckets = NULL;
 	char ring_name[RTE_RING_NAMESIZE];
 	unsigned num_key_slots;
-	unsigned hw_trans_mem_support = 0;
 	unsigned i;
+	unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+	unsigned int readwrite_concur_support = 0;
+
 	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
 
 	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
@@ -107,7 +106,6 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	/* Check for valid parameters */
 	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
 			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
-			!rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) ||
 			(params->key_len == 0)) {
 		rte_errno = EINVAL;
 		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
@@ -118,21 +116,29 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
 		hw_trans_mem_support = 1;
 
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
+		multi_writer_support = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
+		readwrite_concur_support = 1;
+		multi_writer_support = 1;
+	}
+
 	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
-	if (hw_trans_mem_support)
+	if (multi_writer_support)
 		/*
 		 * Increase number of slots by total number of indices
 		 * that can be stored in the lcore caches
 		 * except for the first cache
 		 */
 		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
-					LCORE_CACHE_SIZE + 1;
+					(LCORE_CACHE_SIZE - 1) + 1;
 	else
 		num_key_slots = params->entries + 1;
 
 	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
 	/* Create ring (Dummy slot index is not enqueued) */
-	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
+	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
 			params->socket_id, 0);
 	if (r == NULL) {
 		RTE_LOG(ERR, HASH, "memory allocation failed\n");
@@ -233,7 +239,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
 #endif
 
-	if (hw_trans_mem_support) {
+	if (multi_writer_support) {
 		h->local_free_slots = rte_zmalloc_socket(NULL,
 				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
 				RTE_CACHE_LINE_SIZE, params->socket_id);
@@ -261,6 +267,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->key_store = k;
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
+	h->multi_writer_support = multi_writer_support;
+	h->readwrite_concur_support = readwrite_concur_support;
 
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -271,24 +279,20 @@ rte_hash_create(const struct rte_hash_parameters *params)
 #endif
 		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
 
-	/* Turn on multi-writer only with explicit flat from user and TM
+	/* Turn on multi-writer only with explicit flag from user and TM
 	 * support.
 	 */
-	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
-		if (h->hw_trans_mem_support) {
-			h->add_key = ADD_KEY_MULTIWRITER_TM;
-		} else {
-			h->add_key = ADD_KEY_MULTIWRITER;
-			h->multiwriter_lock = rte_malloc(NULL,
-							sizeof(rte_spinlock_t),
-							LCORE_CACHE_SIZE);
-			rte_spinlock_init(h->multiwriter_lock);
-		}
-	} else
-		h->add_key = ADD_KEY_SINGLEWRITER;
+	if (h->multi_writer_support) {
+		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
+						RTE_CACHE_LINE_SIZE);
+		if (h->readwrite_lock == NULL)
+			goto err_unlock;
+
+		rte_rwlock_init(h->readwrite_lock);
+	}
 
 	/* Populate free slots ring. Entry zero is reserved for key misses. */
-	for (i = 1; i < params->entries + 1; i++)
+	for (i = 1; i < num_key_slots; i++)
 		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
 
 	te->data = (void *) h;
@@ -335,11 +339,10 @@ rte_hash_free(struct rte_hash *h)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	if (h->hw_trans_mem_support)
+	if (h->multi_writer_support) {
 		rte_free(h->local_free_slots);
-
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_free(h->multiwriter_lock);
+		rte_free(h->readwrite_lock);
+	}
 	rte_ring_free(h->free_slots);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
@@ -366,15 +369,78 @@ rte_hash_secondary_hash(const hash_sig_t primary_hash)
 	return primary_hash ^ ((tag + 1) * alt_bits_xor);
 }
 
+int32_t
+rte_hash_count(const struct rte_hash *h)
+{
+	uint32_t tot_ring_cnt, cached_cnt = 0;
+	uint32_t i, ret;
+
+	if (h == NULL)
+		return -EINVAL;
+
+	if (h->multi_writer_support) {
+		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+					(LCORE_CACHE_SIZE - 1);
+		for (i = 0; i < RTE_MAX_LCORE; i++)
+			cached_cnt += h->local_free_slots[i].len;
+
+		ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
+								cached_cnt;
+	} else {
+		tot_ring_cnt = h->entries;
+		ret = tot_ring_cnt - rte_ring_count(h->free_slots);
+	}
+	return ret;
+}
+
+/* Read write locks implemented using rte_rwlock */
+static inline void
+__hash_rw_writer_lock(const struct rte_hash *h)
+{
+	if (h->multi_writer_support && h->hw_trans_mem_support)
+		rte_rwlock_write_lock_tm(h->readwrite_lock);
+	else if (h->multi_writer_support)
+		rte_rwlock_write_lock(h->readwrite_lock);
+}
+
+
+static inline void
+__hash_rw_reader_lock(const struct rte_hash *h)
+{
+	if (h->readwrite_concur_support && h->hw_trans_mem_support)
+		rte_rwlock_read_lock_tm(h->readwrite_lock);
+	else if (h->readwrite_concur_support)
+		rte_rwlock_read_lock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_writer_unlock(const struct rte_hash *h)
+{
+	if (h->multi_writer_support && h->hw_trans_mem_support)
+		rte_rwlock_write_unlock_tm(h->readwrite_lock);
+	else if (h->multi_writer_support)
+		rte_rwlock_write_unlock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_reader_unlock(const struct rte_hash *h)
+{
+	if (h->readwrite_concur_support && h->hw_trans_mem_support)
+		rte_rwlock_read_unlock_tm(h->readwrite_lock);
+	else if (h->readwrite_concur_support)
+		rte_rwlock_read_unlock(h->readwrite_lock);
+}
+
 void
 rte_hash_reset(struct rte_hash *h)
 {
 	void *ptr;
-	unsigned i;
+	uint32_t tot_ring_cnt, i;
 
 	if (h == NULL)
 		return;
 
+	__hash_rw_writer_lock(h);
 	memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
 	memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
 
@@ -383,97 +449,260 @@ rte_hash_reset(struct rte_hash *h)
 		rte_pause();
 
 	/* Repopulate the free slots ring. Entry zero is reserved for key misses */
-	for (i = 1; i < h->entries + 1; i++)
+	if (h->multi_writer_support)
+		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+					(LCORE_CACHE_SIZE - 1);
+	else
+		tot_ring_cnt = h->entries;
+
+	for (i = 1; i < tot_ring_cnt + 1; i++)
 		rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
 
-	if (h->hw_trans_mem_support) {
+	if (h->multi_writer_support) {
 		/* Reset local caches per lcore */
 		for (i = 0; i < RTE_MAX_LCORE; i++)
 			h->local_free_slots[i].len = 0;
 	}
+	__hash_rw_writer_unlock(h);
 }
 
-/* Search for an entry that can be pushed to its alternative location */
-static inline int
-make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
-		unsigned int *nr_pushes)
+/*
+ * Function called to enqueue back an index in the cache/ring,
+ * as slot has not being used and it can be used in the
+ * next addition attempt.
+ */
+static inline void
+enqueue_slot_back(const struct rte_hash *h,
+		struct lcore_cache *cached_free_slots,
+		void *slot_id)
 {
-	unsigned i, j;
-	int ret;
-	uint32_t next_bucket_idx;
-	struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
+	if (h->multi_writer_support) {
+		cached_free_slots->objs[cached_free_slots->len] = slot_id;
+		cached_free_slots->len++;
+	} else
+		rte_ring_sp_enqueue(h->free_slots, slot_id);
+}
+
+/* Search a key from bucket and update its data */
+static inline int32_t
+search_and_update(const struct rte_hash *h, void *data, const void *key,
+	struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+{
+	int i;
+	struct rte_hash_key *k, *keys = h->key_store;
 
-	/*
-	 * Push existing item (search for bucket with space in
-	 * alternative locations) to its alternative location
-	 */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		/* Search for space in alternative locations */
-		next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
-		next_bkt[i] = &h->buckets[next_bucket_idx];
-		for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
-			if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
-				break;
+		if (bkt->sig_current[i] == sig &&
+				bkt->sig_alt[i] == alt_hash) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				/* Update data */
+				k->pdata = data;
+				/*
+				 * Return index where key is stored,
+				 * subtracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
 		}
-
-		if (j != RTE_HASH_BUCKET_ENTRIES)
-			break;
 	}
+	return -1;
+}
 
-	/* Alternative location has spare room (end of recursive function) */
-	if (i != RTE_HASH_BUCKET_ENTRIES) {
-		next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
-		next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
-		next_bkt[i]->key_idx[j] = bkt->key_idx[i];
-		return i;
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around.
+ * return 1 if matching existing key, return 0 if succeeds, return -1 for no
+ * empty entry.
+ */
+static inline int32_t
+rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
+		struct rte_hash_bucket *prim_bkt,
+		struct rte_hash_bucket *sec_bkt,
+		const struct rte_hash_key *key, void *data,
+		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+		int32_t *ret_val)
+{
+	unsigned int i;
+	struct rte_hash_bucket *cur_bkt = prim_bkt;
+	int32_t ret;
+
+	__hash_rw_writer_lock(h);
+	/* Check if key was inserted after last check but before this
+	 * protected region in case of inserting duplicated keys.
+	 */
+	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
 	}
 
-	/* Pick entry that has not been pushed yet */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
-		if (bkt->flag[i] == 0)
+	/* Insert new entry if there is room in the primary
+	 * bucket.
+	 */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		/* Check if slot is available */
+		if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+			prim_bkt->sig_current[i] = sig;
+			prim_bkt->sig_alt[i] = alt_hash;
+			prim_bkt->key_idx[i] = new_idx;
 			break;
+		}
+	}
+	__hash_rw_writer_unlock(h);
 
-	/* All entries have been pushed, so entry cannot be added */
-	if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
-		return -ENOSPC;
+	if (i != RTE_HASH_BUCKET_ENTRIES)
+		return 0;
 
-	/* Set flag to indicate that this entry is going to be pushed */
-	bkt->flag[i] = 1;
+	/* no empty entry */
+	return -1;
+}
 
-	/* Need room in alternative bucket to insert the pushed entry */
-	ret = make_space_bucket(h, next_bkt[i], nr_pushes);
-	/*
-	 * After recursive function.
-	 * Clear flags and insert the pushed entry
-	 * in its alternative location if successful,
-	 * or return error
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
+ * return 0 if succeeds.
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			struct rte_hash_bucket *alt_bkt,
+			const struct rte_hash_key *key, void *data,
+			struct queue_node *leaf, uint32_t leaf_slot,
+			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+			int32_t *ret_val)
+{
+	uint32_t prev_alt_bkt_idx;
+	struct rte_hash_bucket *cur_bkt = bkt;
+	struct queue_node *prev_node, *curr_node = leaf;
+	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+	uint32_t prev_slot, curr_slot = leaf_slot;
+	int32_t ret;
+
+	__hash_rw_writer_lock(h);
+
+	/* In case empty slot was gone before entering protected region */
+	if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
+		__hash_rw_writer_unlock(h);
+		return -1;
+	}
+
+	/* Check if key was inserted after last check but before this
+	 * protected region.
 	 */
-	bkt->flag[i] = 0;
-	if (ret >= 0) {
-		next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
-		next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
-		next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
-		return i;
-	} else
-		return ret;
+	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+
+	ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+
+	while (likely(curr_node->prev != NULL)) {
+		prev_node = curr_node->prev;
+		prev_bkt = prev_node->bkt;
+		prev_slot = curr_node->prev_slot;
+
+		prev_alt_bkt_idx =
+			prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+
+		if (unlikely(&h->buckets[prev_alt_bkt_idx]
+				!= curr_bkt)) {
+			/* revert it to empty, otherwise duplicated keys */
+			curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
+			__hash_rw_writer_unlock(h);
+			return -1;
+		}
+
+		/* Need to swap current/alt sig to allow later
+		 * Cuckoo insert to move elements back to its
+		 * primary bucket if available
+		 */
+		curr_bkt->sig_alt[curr_slot] =
+			 prev_bkt->sig_current[prev_slot];
+		curr_bkt->sig_current[curr_slot] =
+			prev_bkt->sig_alt[prev_slot];
+		curr_bkt->key_idx[curr_slot] =
+			prev_bkt->key_idx[prev_slot];
+
+		curr_slot = prev_slot;
+		curr_node = prev_node;
+		curr_bkt = curr_node->bkt;
+	}
+
+	curr_bkt->sig_current[curr_slot] = sig;
+	curr_bkt->sig_alt[curr_slot] = alt_hash;
+	curr_bkt->key_idx[curr_slot] = new_idx;
+
+	__hash_rw_writer_unlock(h);
+
+	return 0;
 
 }
 
 /*
- * Function called to enqueue back an index in the cache/ring,
- * as slot has not being used and it can be used in the
- * next addition attempt.
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
  */
-static inline void
-enqueue_slot_back(const struct rte_hash *h,
-		struct lcore_cache *cached_free_slots,
-		void *slot_id)
+static inline int
+rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			struct rte_hash_bucket *sec_bkt,
+			const struct rte_hash_key *key, void *data,
+			hash_sig_t sig, hash_sig_t alt_hash,
+			uint32_t new_idx, int32_t *ret_val)
 {
-	if (h->hw_trans_mem_support) {
-		cached_free_slots->objs[cached_free_slots->len] = slot_id;
-		cached_free_slots->len++;
-	} else
-		rte_ring_sp_enqueue(h->free_slots, slot_id);
+	unsigned int i;
+	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+	struct queue_node *tail, *head;
+	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+	tail = queue;
+	head = queue + 1;
+	tail->bkt = bkt;
+	tail->prev = NULL;
+	tail->prev_slot = -1;
+
+	/* Cuckoo bfs Search */
+	while (likely(tail != head && head <
+					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
+					RTE_HASH_BUCKET_ENTRIES)) {
+		curr_bkt = tail->bkt;
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
+				int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
+						bkt, sec_bkt, key, data,
+						tail, i, sig, alt_hash,
+						new_idx, ret_val);
+				if (likely(ret != -1))
+					return ret;
+			}
+
+			/* Enqueue new node and keep prev node info */
+			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
+						    & h->bucket_bitmask]);
+			head->bkt = alt_bkt;
+			head->prev = tail;
+			head->prev_slot = i;
+			head++;
+		}
+		tail++;
+	}
+
+	return -ENOSPC;
 }
 
 static inline int32_t
@@ -482,19 +711,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 {
 	hash_sig_t alt_hash;
 	uint32_t prim_bucket_idx, sec_bucket_idx;
-	unsigned i;
 	struct rte_hash_bucket *prim_bkt, *sec_bkt;
-	struct rte_hash_key *new_k, *k, *keys = h->key_store;
+	struct rte_hash_key *new_k, *keys = h->key_store;
 	void *slot_id = NULL;
 	uint32_t new_idx;
 	int ret;
 	unsigned n_slots;
 	unsigned lcore_id;
 	struct lcore_cache *cached_free_slots = NULL;
-	unsigned int nr_pushes = 0;
-
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_spinlock_lock(h->multiwriter_lock);
+	int32_t ret_val;
 
 	prim_bucket_idx = sig & h->bucket_bitmask;
 	prim_bkt = &h->buckets[prim_bucket_idx];
@@ -505,8 +730,24 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	sec_bkt = &h->buckets[sec_bucket_idx];
 	rte_prefetch0(sec_bkt);
 
-	/* Get a new slot for storing the new key */
-	if (h->hw_trans_mem_support) {
+	/* Check if key is already inserted in primary location */
+	__hash_rw_writer_lock(h);
+	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
+
+	/* Check if key is already inserted in secondary location */
+	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
+	__hash_rw_writer_unlock(h);
+
+	/* Did not find a match, so get a new slot for storing the new key */
+	if (h->multi_writer_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Try to get a free slot from the local cache */
@@ -516,8 +757,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 					cached_free_slots->objs,
 					LCORE_CACHE_SIZE, NULL);
 			if (n_slots == 0) {
-				ret = -ENOSPC;
-				goto failure;
+				return -ENOSPC;
 			}
 
 			cached_free_slots->len += n_slots;
@@ -528,122 +768,50 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 		slot_id = cached_free_slots->objs[cached_free_slots->len];
 	} else {
 		if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
-			ret = -ENOSPC;
-			goto failure;
+			return -ENOSPC;
 		}
 	}
 
 	new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
-	rte_prefetch0(new_k);
 	new_idx = (uint32_t)((uintptr_t) slot_id);
-
-	/* Check if key is already inserted in primary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (prim_bkt->sig_current[i] == sig &&
-				prim_bkt->sig_alt[i] == alt_hash) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					prim_bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				/* Enqueue index of free slot back in the ring. */
-				enqueue_slot_back(h, cached_free_slots, slot_id);
-				/* Update data */
-				k->pdata = data;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				return prim_bkt->key_idx[i] - 1;
-			}
-		}
-	}
-
-	/* Check if key is already inserted in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (sec_bkt->sig_alt[i] == sig &&
-				sec_bkt->sig_current[i] == alt_hash) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					sec_bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				/* Enqueue index of free slot back in the ring. */
-				enqueue_slot_back(h, cached_free_slots, slot_id);
-				/* Update data */
-				k->pdata = data;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				return sec_bkt->key_idx[i] - 1;
-			}
-		}
-	}
-
 	/* Copy key */
 	rte_memcpy(new_k->key, key, h->key_len);
 	new_k->pdata = data;
 
-#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
-	if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
-		ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
-				sig, alt_hash, new_idx);
-		if (ret >= 0)
-			return new_idx - 1;
 
-		/* Primary bucket full, need to make space for new entry */
-		ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
-							alt_hash, new_idx);
+	/* Find an empty slot and insert */
+	ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
+					sig, alt_hash, new_idx, &ret_val);
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
+	}
 
-		if (ret >= 0)
-			return new_idx - 1;
+	/* Primary bucket full, need to make space for new entry */
+	ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
+					sig, alt_hash, new_idx, &ret_val);
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
+	}
 
-		/* Also search secondary bucket to get better occupancy */
-		ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
-							alt_hash, new_idx);
+	/* Also search secondary bucket to get better occupancy */
+	ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
+					alt_hash, sig, new_idx, &ret_val);
 
-		if (ret >= 0)
-			return new_idx - 1;
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
 	} else {
-#endif
-		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-			/* Check if slot is available */
-			if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
-				prim_bkt->sig_current[i] = sig;
-				prim_bkt->sig_alt[i] = alt_hash;
-				prim_bkt->key_idx[i] = new_idx;
-				break;
-			}
-		}
-
-		if (i != RTE_HASH_BUCKET_ENTRIES) {
-			if (h->add_key == ADD_KEY_MULTIWRITER)
-				rte_spinlock_unlock(h->multiwriter_lock);
-			return new_idx - 1;
-		}
-
-		/* Primary bucket full, need to make space for new entry
-		 * After recursive function.
-		 * Insert the new entry in the position of the pushed entry
-		 * if successful or return error and
-		 * store the new slot back in the ring
-		 */
-		ret = make_space_bucket(h, prim_bkt, &nr_pushes);
-		if (ret >= 0) {
-			prim_bkt->sig_current[ret] = sig;
-			prim_bkt->sig_alt[ret] = alt_hash;
-			prim_bkt->key_idx[ret] = new_idx;
-			if (h->add_key == ADD_KEY_MULTIWRITER)
-				rte_spinlock_unlock(h->multiwriter_lock);
-			return new_idx - 1;
-		}
-#if defined(RTE_ARCH_X86)
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret;
 	}
-#endif
-	/* Error in addition, store new slot back in the ring and return error */
-	enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
-
-failure:
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_spinlock_unlock(h->multiwriter_lock);
-	return ret;
 }
 
 int32_t
@@ -688,20 +856,15 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 	else
 		return ret;
 }
+
+/* Search one bucket to find the match key */
 static inline int32_t
-__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
-					hash_sig_t sig, void **data)
+search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+			void **data, const struct rte_hash_bucket *bkt)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	unsigned i;
-	struct rte_hash_bucket *bkt;
+	int i;
 	struct rte_hash_key *k, *keys = h->key_store;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
-
-	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 		if (bkt->sig_current[i] == sig &&
 				bkt->key_idx[i] != EMPTY_SLOT) {
@@ -718,30 +881,41 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 			}
 		}
 	}
+	return -1;
+}
+
+static inline int32_t
+__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	struct rte_hash_bucket *bkt;
+	int ret;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
 
+	__hash_rw_reader_lock(h);
+
+	/* Check if key is in primary location */
+	ret = search_one_bucket(h, key, sig, data, bkt);
+	if (ret != -1) {
+		__hash_rw_reader_unlock(h);
+		return ret;
+	}
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
 	bkt = &h->buckets[bucket_idx];
 
 	/* Check if key is in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == alt_hash &&
-				bkt->sig_alt[i] == sig) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				if (data != NULL)
-					*data = k->pdata;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				return bkt->key_idx[i] - 1;
-			}
-		}
+	ret = search_one_bucket(h, key, alt_hash, data, bkt);
+	if (ret != -1) {
+		__hash_rw_reader_unlock(h);
+		return ret;
 	}
-
+	__hash_rw_reader_unlock(h);
 	return -ENOENT;
 }
 
@@ -783,7 +957,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 
 	bkt->sig_current[i] = NULL_SIGNATURE;
 	bkt->sig_alt[i] = NULL_SIGNATURE;
-	if (h->hw_trans_mem_support) {
+	if (h->multi_writer_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Cache full, need to free it. */
@@ -804,20 +978,15 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	}
 }
 
+/* Search one bucket and remove the matched key */
 static inline int32_t
-__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
-						hash_sig_t sig)
+search_and_remove(const struct rte_hash *h, const void *key,
+			struct rte_hash_bucket *bkt, hash_sig_t sig)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	unsigned i;
-	struct rte_hash_bucket *bkt;
 	struct rte_hash_key *k, *keys = h->key_store;
+	unsigned int i;
 	int32_t ret;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
-
 	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 		if (bkt->sig_current[i] == sig &&
@@ -837,32 +1006,42 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 			}
 		}
 	}
+	return -1;
+}
+
+static inline int32_t
+__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
+						hash_sig_t sig)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	struct rte_hash_bucket *bkt;
+	int32_t ret;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	__hash_rw_writer_lock(h);
+	/* look for key in primary bucket */
+	ret = search_and_remove(h, key, bkt, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
 
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
 	bkt = &h->buckets[bucket_idx];
 
-	/* Check if key is in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == alt_hash &&
-				bkt->key_idx[i] != EMPTY_SLOT) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				remove_entry(h, bkt, i);
-
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				ret = bkt->key_idx[i] - 1;
-				bkt->key_idx[i] = EMPTY_SLOT;
-				return ret;
-			}
-		}
+	/* look for key in secondary bucket */
+	ret = search_and_remove(h, key, bkt, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
 	}
 
+	__hash_rw_writer_unlock(h);
 	return -ENOENT;
 }
 
@@ -1004,6 +1183,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		rte_prefetch0(secondary_bkt[i]);
 	}
 
+	__hash_rw_reader_lock(h);
 	/* Compare signatures and prefetch key slot of first hit */
 	for (i = 0; i < num_keys; i++) {
 		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
@@ -1086,6 +1266,8 @@ next_key:
 		continue;
 	}
 
+	__hash_rw_reader_unlock(h);
+
 	if (hit_mask != NULL)
 		*hit_mask = hits;
 }
@@ -1144,7 +1326,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
 		idx = *next % RTE_HASH_BUCKET_ENTRIES;
 	}
-
+	__hash_rw_reader_lock(h);
 	/* Get position of entry in key table */
 	position = h->buckets[bucket_idx].key_idx[idx];
 	next_key = (struct rte_hash_key *) ((char *)h->key_store +
@@ -1153,6 +1335,8 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	*key = next_key->key;
 	*data = next_key->pdata;
 
+	__hash_rw_reader_unlock(h);
+
 	/* Increment iterator */
 	(*next)++;
 
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 7a54e555..b43f467d 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -88,15 +88,14 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 
 #endif
 
-enum add_key_case {
-	ADD_KEY_SINGLEWRITER = 0,
-	ADD_KEY_MULTIWRITER,
-	ADD_KEY_MULTIWRITER_TM,
-};
 
 /** Number of items per bucket. */
 #define RTE_HASH_BUCKET_ENTRIES		8
 
+#if !RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES)
+#error RTE_HASH_BUCKET_ENTRIES must be a power of 2
+#endif
+
 #define NULL_SIGNATURE			0
 
 #define EMPTY_SLOT			0
@@ -155,18 +154,20 @@ struct rte_hash {
 
 	struct rte_ring *free_slots;
 	/**< Ring that stores all indexes of the free slots in the key table */
-	uint8_t hw_trans_mem_support;
-	/**< Hardware transactional memory support */
+
 	struct lcore_cache *local_free_slots;
 	/**< Local cache per lcore, storing some indexes of the free slots */
-	enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
-	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
 
 	/* Fields used in lookup */
 
 	uint32_t key_len __rte_cache_aligned;
 	/**< Length of hash key. */
+	uint8_t hw_trans_mem_support;
+	/**< If hardware transactional memory is used. */
+	uint8_t multi_writer_support;
+	/**< If multi-writer support is enabled. */
+	uint8_t readwrite_concur_support;
+	/**< If read-write concurrency support is enabled */
 	rte_hash_function hash_func;    /**< Function used to calculate hash. */
 	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
 	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
@@ -184,6 +185,7 @@ struct rte_hash {
 	/**< Table with buckets storing all the	hash values and key indexes
 	 * to the key table.
 	 */
+	rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
 } __rte_cache_aligned;
 
 struct queue_node {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
deleted file mode 100644
index 2c5b017e..00000000
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016 Intel Corporation
- */
-
-/* rte_cuckoo_hash_x86.h
- * This file holds all x86 specific Cuckoo Hash functions
- */
-
-/* Only tries to insert at one bucket (@prim_bkt) without trying to push
- * buckets around
- */
-static inline unsigned
-rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
-		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
-	unsigned i, status;
-	unsigned try = 0;
-
-	while (try < RTE_HASH_TSX_MAX_RETRY) {
-		status = rte_xbegin();
-		if (likely(status == RTE_XBEGIN_STARTED)) {
-			/* Insert new entry if there is room in the primary
-			* bucket.
-			*/
-			for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-				/* Check if slot is available */
-				if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
-					prim_bkt->sig_current[i] = sig;
-					prim_bkt->sig_alt[i] = alt_hash;
-					prim_bkt->key_idx[i] = new_idx;
-					break;
-				}
-			}
-			rte_xend();
-
-			if (i != RTE_HASH_BUCKET_ENTRIES)
-				return 0;
-
-			break; /* break off try loop if transaction commits */
-		} else {
-			/* If we abort we give up this cuckoo path. */
-			try++;
-			rte_pause();
-		}
-	}
-
-	return -1;
-}
-
-/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
- * the path head with new entry (sig, alt_hash, new_idx)
- */
-static inline int
-rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
-			struct queue_node *leaf, uint32_t leaf_slot,
-			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
-	unsigned try = 0;
-	unsigned status;
-	uint32_t prev_alt_bkt_idx;
-
-	struct queue_node *prev_node, *curr_node = leaf;
-	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
-	uint32_t prev_slot, curr_slot = leaf_slot;
-
-	while (try < RTE_HASH_TSX_MAX_RETRY) {
-		status = rte_xbegin();
-		if (likely(status == RTE_XBEGIN_STARTED)) {
-			while (likely(curr_node->prev != NULL)) {
-				prev_node = curr_node->prev;
-				prev_bkt = prev_node->bkt;
-				prev_slot = curr_node->prev_slot;
-
-				prev_alt_bkt_idx
-					= prev_bkt->sig_alt[prev_slot]
-					    & h->bucket_bitmask;
-
-				if (unlikely(&h->buckets[prev_alt_bkt_idx]
-					     != curr_bkt)) {
-					rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
-				}
-
-				/* Need to swap current/alt sig to allow later
-				 * Cuckoo insert to move elements back to its
-				 * primary bucket if available
-				 */
-				curr_bkt->sig_alt[curr_slot] =
-				    prev_bkt->sig_current[prev_slot];
-				curr_bkt->sig_current[curr_slot] =
-				    prev_bkt->sig_alt[prev_slot];
-				curr_bkt->key_idx[curr_slot]
-				    = prev_bkt->key_idx[prev_slot];
-
-				curr_slot = prev_slot;
-				curr_node = prev_node;
-				curr_bkt = curr_node->bkt;
-			}
-
-			curr_bkt->sig_current[curr_slot] = sig;
-			curr_bkt->sig_alt[curr_slot] = alt_hash;
-			curr_bkt->key_idx[curr_slot] = new_idx;
-
-			rte_xend();
-
-			return 0;
-		}
-
-		/* If we abort we give up this cuckoo path, since most likely it's
-		 * no longer valid as TSX detected data conflict
-		 */
-		try++;
-		rte_pause();
-	}
-
-	return -1;
-}
-
-/*
- * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
- * Cuckoo
- */
-static inline int
-rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
-			struct rte_hash_bucket *bkt,
-			hash_sig_t sig, hash_sig_t alt_hash,
-			uint32_t new_idx)
-{
-	unsigned i;
-	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
-	struct queue_node *tail, *head;
-	struct rte_hash_bucket *curr_bkt, *alt_bkt;
-
-	tail = queue;
-	head = queue + 1;
-	tail->bkt = bkt;
-	tail->prev = NULL;
-	tail->prev_slot = -1;
-
-	/* Cuckoo bfs Search */
-	while (likely(tail != head && head <
-					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
-					RTE_HASH_BUCKET_ENTRIES)) {
-		curr_bkt = tail->bkt;
-		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
-				if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
-						tail, i, sig,
-						alt_hash, new_idx) == 0))
-					return 0;
-			}
-
-			/* Enqueue new node and keep prev node info */
-			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
-						    & h->bucket_bitmask]);
-			head->bkt = alt_bkt;
-			head->prev = tail;
-			head->prev_slot = i;
-			head++;
-		}
-		tail++;
-	}
-
-	return -ENOSPC;
-}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 3beaca71..9e7d9315 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -34,6 +34,9 @@ extern "C" {
 /** Default behavior of insertion, single writer/multi writer */
 #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
 
+/** Flag to support reader writer concurrency */
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;
 
@@ -124,9 +127,22 @@ void
 rte_hash_reset(struct rte_hash *h);
 
 /**
+ * Return the number of keys in the hash table
+ * @param h
+ *  Hash table to query from
+ * @return
+ *   - -EINVAL if parameters are invalid
+ *   - A value indicating how many keys were inserted in the table.
+ */
+int32_t
+rte_hash_count(const struct rte_hash *h);
+
+/**
  * Add a key-value pair to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -146,7 +162,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
  * Add a key-value pair with a pre-computed hash value
  * to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -167,7 +185,9 @@ rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key,
 
 /**
  * Add a key to an existing hash table. This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -185,7 +205,9 @@ rte_hash_add_key(const struct rte_hash *h, const void *key);
 /**
  * Add a key to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -205,7 +227,9 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
 /**
  * Remove a key from an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -224,7 +248,9 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
 /**
  * Remove a key from an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -244,7 +270,9 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
 
 /**
  * Find a key in the hash table given the position.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to get the key from.
@@ -254,8 +282,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
  *   Output containing a pointer to the key
  * @return
  *   - 0 if retrieved successfully
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if no valid key is found in the given position.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if no valid key is found in the given position.
  */
 int
 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
@@ -263,7 +291,9 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
 
 /**
  * Find a key-value pair in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -272,9 +302,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
  * @param data
  *   Output with pointer to data returned from the hash table.
  * @return
- *   0 if successful lookup
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
  */
 int
 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
@@ -282,7 +314,9 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
 /**
  * Find a key-value pair with a pre-computed hash value
  * to an existing hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -293,9 +327,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
  * @param data
  *   Output with pointer to data returned from the hash table.
  * @return
- *   0 if successful lookup
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
  */
 int
 rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
@@ -303,7 +339,9 @@ rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
 
 /**
  * Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -321,14 +359,16 @@ rte_hash_lookup(const struct rte_hash *h, const void *key);
 
 /**
  * Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
  * @param key
  *   Key to find.
  * @param sig
- *   Hash value to remove from the hash table.
+ *   Precomputed hash value for 'key'.
  * @return
  *   - -EINVAL if the parameters are invalid.
  *   - -ENOENT if the key is not found.
@@ -356,7 +396,9 @@ rte_hash_hash(const struct rte_hash *h, const void *key);
 
 /**
  * Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -377,7 +419,9 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
 
 /**
  * Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 479f84b1..cf28031b 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -338,14 +338,13 @@ crc32c_1word(uint32_t data, uint32_t init_val)
 static inline uint32_t
 crc32c_2words(uint64_t data, uint32_t init_val)
 {
+	uint32_t crc, term1, term2;
 	union {
 		uint64_t u64;
 		uint32_t u32[2];
 	} d;
 	d.u64 = data;
 
-	uint32_t crc, term1, term2;
-
 	crc = init_val;
 	crc ^= d.u32[0];
 
@@ -399,9 +398,9 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
 	} d;
 
 	d.u64 = data;
-	init_val = crc32c_sse42_u32(d.u32[0], init_val);
-	init_val = crc32c_sse42_u32(d.u32[1], init_val);
-	return init_val;
+	init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val);
+	init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val);
+	return (uint32_t)init_val;
 }
 #endif
 
@@ -413,7 +412,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val)
 			"crc32q %[data], %[init_val];"
 			: [init_val] "+r" (init_val)
 			: [data] "rm" (data));
-	return init_val;
+	return (uint32_t)init_val;
 }
 #endif
 
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 52a2576f..e216ac8e 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -45,3 +45,11 @@ DPDK_16.07 {
 	rte_hash_get_key_with_position;
 
 } DPDK_2.2;
+
+
+DPDK_18.08 {
+	global:
+
+	rte_hash_count;
+
+} DPDK_16.07;
diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c
index 7397aa69..2560c771 100644
--- a/lib/librte_ip_frag/ip_frag_internal.c
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -152,7 +152,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
 				fp->frags[IP_LAST_FRAG_IDX].len);
 		else
 			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
-				"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+				"ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
 				"total_size: %u, frag_size: %u, last_idx: %u\n"
 				"first fragment: ofs: %u, len: %u\n"
 				"last fragment: ofs: %u, len: %u\n\n",
@@ -210,7 +210,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
 				fp->frags[IP_LAST_FRAG_IDX].len);
 		else
 			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
-				"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+				"ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
 				"total_size: %u, frag_size: %u, last_idx: %u\n"
 				"first fragment: ofs: %u, len: %u\n"
 				"last fragment: ofs: %u, len: %u\n\n",
@@ -331,7 +331,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
 		if (p1->key.key_len == IPV4_KEYLEN)
 			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
 					"tbl: %p, max_entries: %u, use_entries: %u\n"
-					"ipv6_frag_pkt line0: %p, index: %u from %u\n"
+					"ipv4_frag_pkt line0: %p, index: %u from %u\n"
 			"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
 					__func__, __LINE__,
 					tbl, tbl->max_entries, tbl->use_entries,
@@ -357,7 +357,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
 		if (p2->key.key_len == IPV4_KEYLEN)
 			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
 					"tbl: %p, max_entries: %u, use_entries: %u\n"
-					"ipv6_frag_pkt line1: %p, index: %u from %u\n"
+					"ipv4_frag_pkt line1: %p, index: %u from %u\n"
 			"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
 					__func__, __LINE__,
 					tbl, tbl->max_entries, tbl->use_entries,
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c
index 82e831ca..4956b99e 100644
--- a/lib/librte_ip_frag/rte_ipv4_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -59,7 +59,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp)
 	/* chain with the first fragment. */
 	rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
 	rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+	fp->frags[curr_idx].mb = NULL;
 	m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+	fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
 
 	/* update mbuf fields for reassembled packet. */
 	m->ol_flags |= PKT_TX_IP_CKSUM;
diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
index 3479fabb..db249fe6 100644
--- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -82,7 +82,9 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	/* chain with the first fragment. */
 	rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
 	rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+	fp->frags[curr_idx].mb = NULL;
 	m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+	fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
 
 	/* update mbuf fields for reassembled packet. */
 	m->ol_flags |= PKT_TX_IP_CKSUM;
diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
index c4b21961..a738a033 100644
--- a/lib/librte_kni/meson.build
+++ b/lib/librte_kni/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-if host_machine.system() != 'linux'
+if host_machine.system() != 'linux' or cc.sizeof('void *') == 4
 	build = false
 endif
 version = 2
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 28674115..65f6a2b0 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -510,7 +510,7 @@ kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
 {
 	int ret = 0;
 
-	if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
+	if (!rte_eth_dev_is_valid_port(port_id)) {
 		RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id);
 		return -EINVAL;
 	}
@@ -530,7 +530,7 @@ kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
 static int
 kni_config_promiscusity(uint16_t port_id, uint8_t to_on)
 {
-	if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
+	if (!rte_eth_dev_is_valid_port(port_id)) {
 		RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id);
 		return -EINVAL;
 	}
@@ -715,6 +715,9 @@ rte_kni_get(const char *name)
 	struct rte_kni_memzone_slot *it;
 	struct rte_kni *kni;
 
+	if (name == NULL || name[0] == '\0')
+		return NULL;
+
 	/* Note: could be improved perf-wise if necessary */
 	for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
 		it = &kni_memzone_pool.slots[i];
diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
index 4eaa9334..87593954 100644
--- a/lib/librte_kvargs/Makefile
+++ b/lib/librte_kvargs/Makefile
@@ -1,35 +1,5 @@
-# BSD LICENSE
-#
+# SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2014 6WIND S.A.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# - Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# - Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-#
-# - Neither the name of 6WIND S.A. nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-# OF THE POSSIBILITY OF SUCH DAMAGE.
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
@@ -37,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_kvargs.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-LDLIBS += -lrte_eal
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
 
 EXPORT_MAP := rte_kvargs_version.map
 
diff --git a/lib/librte_kvargs/meson.build b/lib/librte_kvargs/meson.build
index 0c5b9cb2..acd3e543 100644
--- a/lib/librte_kvargs/meson.build
+++ b/lib/librte_kvargs/meson.build
@@ -1,6 +1,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+includes = [global_inc]
+includes += include_directories('../librte_eal/common/include')
+
 version = 1
 sources = files('rte_kvargs.c')
 headers = files('rte_kvargs.h')
+
+deps += 'compat'
diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c
index 9662375e..a28f7694 100644
--- a/lib/librte_kvargs/rte_kvargs.c
+++ b/lib/librte_kvargs/rte_kvargs.c
@@ -6,14 +6,13 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include <rte_log.h>
 #include <rte_string_fns.h>
 
 #include "rte_kvargs.h"
 
 /*
  * Receive a string with a list of arguments following the pattern
- * key=value;key=value;... and insert them into the list.
+ * key=value,key=value,... and insert them into the list.
  * strtok() is used so the params string will be copied to be modified.
  */
 static int
@@ -28,29 +27,22 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
 	 * to pass to rte_strsplit
 	 */
 	kvlist->str = strdup(params);
-	if (kvlist->str == NULL) {
-		RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n");
+	if (kvlist->str == NULL)
 		return -1;
-	}
 
 	/* browse each key/value pair and add it in kvlist */
 	str = kvlist->str;
 	while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) {
 
 		i = kvlist->count;
-		if (i >= RTE_KVARGS_MAX) {
-			RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n");
+		if (i >= RTE_KVARGS_MAX)
 			return -1;
-		}
 
 		kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2);
 		kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2);
-		if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) {
-			RTE_LOG(ERR, PMD,
-				"Cannot parse arguments: wrong key or value\n"
-				"params=<%s>\n", params);
+		if (kvlist->pairs[i].key == NULL ||
+		    kvlist->pairs[i].value == NULL)
 			return -1;
-		}
 
 		kvlist->count++;
 		str = NULL;
@@ -89,12 +81,8 @@ check_for_valid_keys(struct rte_kvargs *kvlist,
 	for (i = 0; i < kvlist->count; i++) {
 		pair = &kvlist->pairs[i];
 		ret = is_valid_key(valid, pair->key);
-		if (!ret) {
-			RTE_LOG(ERR, PMD,
-				"Error parsing device, invalid key <%s>\n",
-				pair->key);
+		if (!ret)
 			return -1;
-		}
 	}
 	return 0;
 }
@@ -154,7 +142,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist)
 }
 
 /*
- * Parse the arguments "key=value;key=value;..." string and return
+ * Parse the arguments "key=value,key=value,..." string and return
  * an allocated structure that contains a key/value list. Also
  * check if only valid keys were used.
  */
@@ -180,3 +168,38 @@ rte_kvargs_parse(const char *args, const char * const valid_keys[])
 
 	return kvlist;
 }
+
+__rte_experimental
+struct rte_kvargs *
+rte_kvargs_parse_delim(const char *args, const char * const valid_keys[],
+		       const char *valid_ends)
+{
+	struct rte_kvargs *kvlist = NULL;
+	char *copy;
+	size_t len;
+
+	if (valid_ends == NULL)
+		return rte_kvargs_parse(args, valid_keys);
+
+	copy = strdup(args);
+	if (copy == NULL)
+		return NULL;
+
+	len = strcspn(copy, valid_ends);
+	copy[len] = '\0';
+
+	kvlist = rte_kvargs_parse(copy, valid_keys);
+
+	free(copy);
+	return kvlist;
+}
+
+__rte_experimental
+int
+rte_kvargs_strcmp(const char *key __rte_unused,
+		  const char *value, void *opaque)
+{
+	const char *str = opaque;
+
+	return -abs(strcmp(str, value));
+}
diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h
index 51b8120b..fc041956 100644
--- a/lib/librte_kvargs/rte_kvargs.h
+++ b/lib/librte_kvargs/rte_kvargs.h
@@ -25,6 +25,8 @@
 extern "C" {
 #endif
 
+#include <rte_compat.h>
+
 /** Maximum number of key/value associations */
 #define RTE_KVARGS_MAX 32
 
@@ -72,6 +74,36 @@ struct rte_kvargs *rte_kvargs_parse(const char *args,
 		const char *const valid_keys[]);
 
 /**
+ * Allocate a rte_kvargs and store key/value associations from a string.
+ * This version will consider any byte from valid_ends as a possible
+ * terminating character, and will not parse beyond any of their occurrence.
+ *
+ * The function allocates and fills an rte_kvargs structure from a given
+ * string whose format is key1=value1,key2=value2,...
+ *
+ * The structure can be freed with rte_kvargs_free().
+ *
+ * @param args
+ *   The input string containing the key/value associations
+ *
+ * @param valid_keys
+ *   A list of valid keys (table of const char *, the last must be NULL).
+ *   This argument is ignored if NULL
+ *
+ * @param valid_ends
+ *   Acceptable terminating characters.
+ *   If NULL, the behavior is the same as ``rte_kvargs_parse``.
+ *
+ * @return
+ *   - A pointer to an allocated rte_kvargs structure on success
+ *   - NULL on error
+ */
+__rte_experimental
+struct rte_kvargs *rte_kvargs_parse_delim(const char *args,
+		const char *const valid_keys[],
+		const char *valid_ends);
+
+/**
  * Free a rte_kvargs structure
  *
  * Free a rte_kvargs structure previously allocated with
@@ -121,6 +153,32 @@ int rte_kvargs_process(const struct rte_kvargs *kvlist,
 unsigned rte_kvargs_count(const struct rte_kvargs *kvlist,
 	const char *key_match);
 
+/**
+ * Generic kvarg handler for string comparison.
+ *
+ * This function can be used for a generic string comparison processing
+ * on a list of kvargs.
+ *
+ * @param key
+ *   kvarg pair key.
+ *
+ * @param value
+ *   kvarg pair value.
+ *
+ * @param opaque
+ *   Opaque pointer to a string.
+ *
+ * @return
+ *   0 if the strings match.
+ *   !0 otherwise or on error.
+ *
+ *   Unless strcmp, comparison ordering is not kept.
+ *   In order for rte_kvargs_process to stop processing on match error,
+ *   a negative value is returned even if strcmp had returned a positive one.
+ */
+__rte_experimental
+int rte_kvargs_strcmp(const char *key, const char *value, void *opaque);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map
index 2030ec46..8f4b4e3f 100644
--- a/lib/librte_kvargs/rte_kvargs_version.map
+++ b/lib/librte_kvargs/rte_kvargs_version.map
@@ -8,3 +8,11 @@ DPDK_2.0 {
 
 	local: *;
 };
+
+EXPERIMENTAL {
+	global:
+
+	rte_kvargs_parse_delim;
+	rte_kvargs_strcmp;
+
+} DPDK_2.0;
diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c
index 66330203..1fdec68e 100644
--- a/lib/librte_latencystats/rte_latencystats.c
+++ b/lib/librte_latencystats/rte_latencystats.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2017 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
  */
 
 #include <unistd.h>
@@ -46,7 +46,7 @@ struct rte_latency_stats {
 static struct rte_latency_stats *glob_stats;
 
 struct rxtx_cbs {
-	struct rte_eth_rxtx_callback *cb;
+	const struct rte_eth_rxtx_callback *cb;
 };
 
 static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
@@ -201,7 +201,6 @@ rte_latencystats_init(uint64_t app_samp_intvl,
 	uint16_t pid;
 	uint16_t qid;
 	struct rxtx_cbs *cbs = NULL;
-	const uint16_t nb_ports = rte_eth_dev_count();
 	const char *ptr_strings[NUM_LATENCY_STATS] = {0};
 	const struct rte_memzone *mz = NULL;
 	const unsigned int flags = 0;
@@ -234,7 +233,7 @@ rte_latencystats_init(uint64_t app_samp_intvl,
 	}
 
 	/** Register Rx/Tx callbacks */
-	for (pid = 0; pid < nb_ports; pid++) {
+	RTE_ETH_FOREACH_DEV(pid) {
 		struct rte_eth_dev_info dev_info;
 		rte_eth_dev_info_get(pid, &dev_info);
 		for (qid = 0; qid < dev_info.nb_rx_queues; qid++) {
@@ -266,10 +265,10 @@ rte_latencystats_uninit(void)
 	uint16_t qid;
 	int ret = 0;
 	struct rxtx_cbs *cbs = NULL;
-	const uint16_t nb_ports = rte_eth_dev_count();
+	const struct rte_memzone *mz = NULL;
 
 	/** De register Rx/Tx callbacks */
-	for (pid = 0; pid < nb_ports; pid++) {
+	RTE_ETH_FOREACH_DEV(pid) {
 		struct rte_eth_dev_info dev_info;
 		rte_eth_dev_info_get(pid, &dev_info);
 		for (qid = 0; qid < dev_info.nb_rx_queues; qid++) {
@@ -290,6 +289,11 @@ rte_latencystats_uninit(void)
 		}
 	}
 
+	/* free up the memzone */
+	mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS);
+	if (mz)
+		rte_memzone_free(mz);
+
 	return 0;
 }
 
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 367568ae..e2b98a25 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -6,13 +6,12 @@ include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_mbuf.a
 
-CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 LDLIBS += -lrte_eal -lrte_mempool
 
 EXPORT_MAP := rte_mbuf_version.map
 
-LIBABIVER := 3
+LIBABIVER := 4
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c rte_mbuf_pool_ops.c
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 869c17c1..45ffb0db 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,7 +2,6 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 3
-allow_experimental_apis = true
 sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
 headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
 deps += ['mempool']
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 091d388d..e714c5a5 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -33,21 +33,6 @@
 #include <rte_memcpy.h>
 
 /*
- * ctrlmbuf constructor, given as a callback function to
- * rte_mempool_obj_iter() or rte_mempool_create()
- */
-void
-rte_ctrlmbuf_init(struct rte_mempool *mp,
-		__attribute__((unused)) void *opaque_arg,
-		void *_m,
-		__attribute__((unused)) unsigned i)
-{
-	struct rte_mbuf *m = _m;
-	rte_pktmbuf_init(mp, opaque_arg, _m, i);
-	m->ol_flags |= CTRL_MBUF_FLAG;
-}
-
-/*
  * pktmbuf pool constructor, given as a callback function to
  * rte_mempool_create(), or called directly if using
  * rte_mempool_create_empty()/rte_mempool_populate()
@@ -122,7 +107,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 }
 
 /* Helper to create a mbuf pool with given mempool ops name*/
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
 	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
 	int socket_id, const char *ops_name)
@@ -405,6 +390,9 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
 	case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
 	case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP";
+	case PKT_TX_TUNNEL_VXLAN_GPE: return "PKT_TX_TUNNEL_VXLAN_GPE";
+	case PKT_TX_TUNNEL_IP: return "PKT_TX_TUNNEL_IP";
+	case PKT_TX_TUNNEL_UDP: return "PKT_TX_TUNNEL_UDP";
 	case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
 	case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD";
 	default: return NULL;
@@ -439,6 +427,12 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 		  "PKT_TX_TUNNEL_NONE" },
 		{ PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
 		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
 		{ PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
 		{ PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL },
 	};
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 62740254..9ce5d76d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -201,15 +201,44 @@ extern "C" {
 
 /**
  * Bits 45:48 used for the tunnel type.
- * When doing Tx offload like TSO or checksum, the HW needs to configure the
- * tunnel type into the HW descriptors.
+ * The tunnel type must be specified for TSO or checksum on the inner part
+ * of tunnel packets.
+ * These flags can be used with PKT_TX_TCP_SEG for TSO, or PKT_TX_xxx_CKSUM.
+ * The mbuf fields for inner and outer header lengths are required:
+ * outer_l2_len, outer_l3_len, l2_len, l3_len, l4_len and tso_segsz for TSO.
  */
 #define PKT_TX_TUNNEL_VXLAN   (0x1ULL << 45)
 #define PKT_TX_TUNNEL_GRE     (0x2ULL << 45)
 #define PKT_TX_TUNNEL_IPIP    (0x3ULL << 45)
 #define PKT_TX_TUNNEL_GENEVE  (0x4ULL << 45)
-/**< TX packet with MPLS-in-UDP RFC 7510 header. */
+/** TX packet with MPLS-in-UDP RFC 7510 header. */
 #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
+#define PKT_TX_TUNNEL_VXLAN_GPE (0x6ULL << 45)
+/**
+ * Generic IP encapsulated tunnel type, used for TSO and checksum offload.
+ * It can be used for tunnels which are not standards or listed above.
+ * It is preferred to use specific tunnel flags like PKT_TX_TUNNEL_GRE
+ * or PKT_TX_TUNNEL_IPIP if possible.
+ * The ethdev must be configured with DEV_TX_OFFLOAD_IP_TNL_TSO.
+ * Outer and inner checksums are done according to the existing flags like
+ * PKT_TX_xxx_CKSUM.
+ * Specific tunnel headers that contain payload length, sequence id
+ * or checksum are not expected to be updated.
+ */
+#define PKT_TX_TUNNEL_IP (0xDULL << 45)
+/**
+ * Generic UDP encapsulated tunnel type, used for TSO and checksum offload.
+ * UDP tunnel type implies outer IP layer.
+ * It can be used for tunnels which are not standards or listed above.
+ * It is preferred to use specific tunnel flags like PKT_TX_TUNNEL_VXLAN
+ * if possible.
+ * The ethdev must be configured with DEV_TX_OFFLOAD_UDP_TNL_TSO.
+ * Outer and inner checksums are done according to the existing flags like
+ * PKT_TX_xxx_CKSUM.
+ * Specific tunnel headers that contain payload length, sequence id
+ * or checksum are not expected to be updated.
+ */
+#define PKT_TX_TUNNEL_UDP (0xEULL << 45)
 /* add new TX TUNNEL type here */
 #define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
 
@@ -226,12 +255,8 @@ extern "C" {
  *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  *    PKT_TX_TCP_CKSUM)
  *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
- *    to 0 in the packet
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag
  *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- *  - calculate the pseudo header checksum without taking ip_len in account,
- *    and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
- *    rte_ipv6_phdr_cksum() that can be used as helpers.
  */
 #define PKT_TX_TCP_SEG       (1ULL << 50)
 
@@ -244,9 +269,6 @@ extern "C" {
  *  - fill l2_len and l3_len in mbuf
  *  - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
  *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- *  - calculate the pseudo header checksum and set it in the L4 header (only
- *    for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
- *    For SCTP, set the crc field to 0.
  */
 #define PKT_TX_L4_NO_CKSUM   (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
 #define PKT_TX_TCP_CKSUM     (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
@@ -258,7 +280,6 @@ extern "C" {
  * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
  * also be set by the application, although a PMD will only check
  * PKT_TX_IP_CKSUM.
- *  - set the IP checksum field in the packet to 0
  *  - fill the mbuf offload information: l2_len, l3_len
  */
 #define PKT_TX_IP_CKSUM      (1ULL << 54)
@@ -288,10 +309,8 @@ extern "C" {
 
 /**
  * Offload the IP checksum of an external header in the hardware. The
- * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
- * a PMD will only check PKT_TX_IP_CKSUM.  The IP checksum field in the
- * packet must be set to 0.
- *  - set the outer IP checksum field in the packet to 0
+ * flag PKT_TX_OUTER_IPV4 should also be set by the application, although
+ * a PMD will only check PKT_TX_OUTER_IP_CKSUM.
  *  - fill the mbuf offload information: outer_l2_len, outer_l3_len
  */
 #define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
@@ -326,13 +345,13 @@ extern "C" {
 		PKT_TX_MACSEC |		 \
 		PKT_TX_SEC_OFFLOAD)
 
-#define __RESERVED           (1ULL << 61) /**< reserved for future mbuf use */
+/**
+ * Mbuf having an external buffer attached. shinfo in mbuf must be filled.
+ */
+#define EXT_ATTACHED_MBUF    (1ULL << 61)
 
 #define IND_ATTACHED_MBUF    (1ULL << 62) /**< Indirect attached mbuf */
 
-/* Use final bit of flags to indicate a control mbuf */
-#define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
-
 /** Alignment constraint of mbuf private area. */
 #define RTE_MBUF_PRIV_ALIGN 8
 
@@ -569,8 +588,27 @@ struct rte_mbuf {
 	/** Sequence number. See also rte_reorder_insert(). */
 	uint32_t seqn;
 
+	/** Shared data for external buffer attached to mbuf. See
+	 * rte_pktmbuf_attach_extbuf().
+	 */
+	struct rte_mbuf_ext_shared_info *shinfo;
+
 } __rte_cache_aligned;
 
+/**
+ * Function typedef of callback to free externally attached buffer.
+ */
+typedef void (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
+
+/**
+ * Shared data at the end of an external buffer.
+ */
+struct rte_mbuf_ext_shared_info {
+	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback function */
+	void *fcb_opaque;                        /**< Free callback argument */
+	rte_atomic16_t refcnt_atomic;        /**< Atomically accessed refcnt */
+};
+
 /**< Maximum number of nb_segs allowed. */
 #define RTE_MBUF_MAX_NB_SEGS	UINT16_MAX
 
@@ -691,14 +729,53 @@ rte_mbuf_to_baddr(struct rte_mbuf *md)
 }
 
 /**
- * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
+ * Return the starting address of the private data area embedded in
+ * the given mbuf.
+ *
+ * Note that no check is made to ensure that a private data area
+ * actually exists in the supplied mbuf.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @return
+ *   The starting address of the private data area of the given mbuf.
+ */
+static inline void * __rte_experimental
+rte_mbuf_to_priv(struct rte_mbuf *m)
+{
+	return RTE_PTR_ADD(m, sizeof(struct rte_mbuf));
+}
+
+/**
+ * Returns TRUE if given mbuf is cloned by mbuf indirection, or FALSE
+ * otherwise.
+ *
+ * If a mbuf has its data in another mbuf and references it by mbuf
+ * indirection, this mbuf can be defined as a cloned mbuf.
+ */
+#define RTE_MBUF_CLONED(mb)     ((mb)->ol_flags & IND_ATTACHED_MBUF)
+
+/**
+ * Deprecated.
+ * Use RTE_MBUF_CLONED().
+ */
+#define RTE_MBUF_INDIRECT(mb)   RTE_MBUF_CLONED(mb)
+
+/**
+ * Returns TRUE if given mbuf has an external buffer, or FALSE otherwise.
+ *
+ * External buffer is a user-provided anonymous buffer.
  */
-#define RTE_MBUF_INDIRECT(mb)   ((mb)->ol_flags & IND_ATTACHED_MBUF)
+#define RTE_MBUF_HAS_EXTBUF(mb) ((mb)->ol_flags & EXT_ATTACHED_MBUF)
 
 /**
  * Returns TRUE if given mbuf is direct, or FALSE otherwise.
+ *
+ * If a mbuf embeds its own data after the rte_mbuf structure, this mbuf
+ * can be defined as a direct mbuf.
  */
-#define RTE_MBUF_DIRECT(mb)     (!RTE_MBUF_INDIRECT(mb))
+#define RTE_MBUF_DIRECT(mb) \
+	(!((mb)->ol_flags & (IND_ATTACHED_MBUF | EXT_ATTACHED_MBUF)))
 
 /**
  * Private data in case of pktmbuf pool.
@@ -748,7 +825,7 @@ rte_mbuf_refcnt_read(const struct rte_mbuf *m)
 static inline void
 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 {
-	rte_atomic16_set(&m->refcnt_atomic, new_value);
+	rte_atomic16_set(&m->refcnt_atomic, (int16_t)new_value);
 }
 
 /* internal */
@@ -778,8 +855,9 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
 	 * reference counter can occur.
 	 */
 	if (likely(rte_mbuf_refcnt_read(m) == 1)) {
-		rte_mbuf_refcnt_set(m, 1 + value);
-		return 1 + value;
+		++value;
+		rte_mbuf_refcnt_set(m, (uint16_t)value);
+		return (uint16_t)value;
 	}
 
 	return __rte_mbuf_refcnt_update(m, value);
@@ -824,6 +902,59 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 
 #endif /* RTE_MBUF_REFCNT_ATOMIC */
 
+/**
+ * Reads the refcnt of an external buffer.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @return
+ *   Reference count number.
+ */
+static inline uint16_t
+rte_mbuf_ext_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo)
+{
+	return (uint16_t)(rte_atomic16_read(&shinfo->refcnt_atomic));
+}
+
+/**
+ * Set refcnt of an external buffer.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @param new_value
+ *   Value set
+ */
+static inline void
+rte_mbuf_ext_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo,
+	uint16_t new_value)
+{
+	rte_atomic16_set(&shinfo->refcnt_atomic, (int16_t)new_value);
+}
+
+/**
+ * Add given value to refcnt of an external buffer and return its new
+ * value.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @param value
+ *   Value to add/subtract
+ * @return
+ *   Updated value
+ */
+static inline uint16_t
+rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
+	int16_t value)
+{
+	if (likely(rte_mbuf_ext_refcnt_read(shinfo) == 1)) {
+		++value;
+		rte_mbuf_ext_refcnt_set(shinfo, (uint16_t)value);
+		return (uint16_t)value;
+	}
+
+	return (uint16_t)rte_atomic16_add_return(&shinfo->refcnt_atomic, value);
+}
+
 /** Mbuf prefetch */
 #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
 	if ((m) != NULL)                        \
@@ -915,89 +1046,6 @@ __rte_mbuf_raw_free(struct rte_mbuf *m)
 	rte_mbuf_raw_free(m);
 }
 
-/* Operations on ctrl mbuf */
-
-/**
- * The control mbuf constructor.
- *
- * This function initializes some fields in an mbuf structure that are
- * not modified by the user once created (mbuf type, origin pool, buffer
- * start address, and so on). This function is given as a callback function
- * to rte_mempool_obj_iter() or rte_mempool_create() at pool creation time.
- *
- * @param mp
- *   The mempool from which the mbuf is allocated.
- * @param opaque_arg
- *   A pointer that can be used by the user to retrieve useful information
- *   for mbuf initialization. This pointer is the opaque argument passed to
- *   rte_mempool_obj_iter() or rte_mempool_create().
- * @param m
- *   The mbuf to initialize.
- * @param i
- *   The index of the mbuf in the pool table.
- */
-void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg,
-		void *m, unsigned i);
-
-/**
- * Allocate a new mbuf (type is ctrl) from mempool *mp*.
- *
- * This new mbuf is initialized with data pointing to the beginning of
- * buffer, and with a length of zero.
- *
- * @param mp
- *   The mempool from which the mbuf is allocated.
- * @return
- *   - The pointer to the new mbuf on success.
- *   - NULL if allocation failed.
- */
-#define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp)
-
-/**
- * Free a control mbuf back into its original mempool.
- *
- * @param m
- *   The control mbuf to be freed.
- */
-#define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m)
-
-/**
- * A macro that returns the pointer to the carried data.
- *
- * The value that can be read or assigned.
- *
- * @param m
- *   The control mbuf.
- */
-#define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off)
-
-/**
- * A macro that returns the length of the carried data.
- *
- * The value that can be read or assigned.
- *
- * @param m
- *   The control mbuf.
- */
-#define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m)
-
-/**
- * Tests if an mbuf is a control mbuf
- *
- * @param m
- *   The mbuf to be tested
- * @return
- *   - True (1) if the mbuf is a control mbuf
- *   - False(0) otherwise
- */
-static inline int
-rte_is_ctrlmbuf(struct rte_mbuf *m)
-{
-	return !!(m->ol_flags & CTRL_MBUF_FLAG);
-}
-
-/* Operations on pkt mbuf */
-
 /**
  * The packet mbuf constructor.
  *
@@ -1116,7 +1164,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
  *    - EEXIST - a memzone with the same name already exists
  *    - ENOMEM - no appropriate memory area found in which to create memzone
  */
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
 	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
 	int socket_id, const char *ops_name);
@@ -1172,7 +1220,8 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp)
  */
 static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
 {
-	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+	m->data_off = (uint16_t)RTE_MIN((uint16_t)RTE_PKTMBUF_HEADROOM,
+					(uint16_t)m->buf_len);
 }
 
 /**
@@ -1281,11 +1330,161 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
 }
 
 /**
+ * Initialize shared data at the end of an external buffer before attaching
+ * to a mbuf by ``rte_pktmbuf_attach_extbuf()``. This is not a mandatory
+ * initialization but a helper function to simply spare a few bytes at the
+ * end of the buffer for shared data. If shared data is allocated
+ * separately, this should not be called but application has to properly
+ * initialize the shared data according to its need.
+ *
+ * Free callback and its argument is saved and the refcnt is set to 1.
+ *
+ * @warning
+ * The value of buf_len will be reduced to RTE_PTR_DIFF(shinfo, buf_addr)
+ * after this initialization. This shall be used for
+ * ``rte_pktmbuf_attach_extbuf()``
+ *
+ * @param buf_addr
+ *   The pointer to the external buffer.
+ * @param [in,out] buf_len
+ *   The pointer to length of the external buffer. Input value must be
+ *   larger than the size of ``struct rte_mbuf_ext_shared_info`` and
+ *   padding for alignment. If not enough, this function will return NULL.
+ *   Adjusted buffer length will be returned through this pointer.
+ * @param free_cb
+ *   Free callback function to call when the external buffer needs to be
+ *   freed.
+ * @param fcb_opaque
+ *   Argument for the free callback function.
+ *
+ * @return
+ *   A pointer to the initialized shared data on success, return NULL
+ *   otherwise.
+ */
+static inline struct rte_mbuf_ext_shared_info *
+rte_pktmbuf_ext_shinfo_init_helper(void *buf_addr, uint16_t *buf_len,
+	rte_mbuf_extbuf_free_callback_t free_cb, void *fcb_opaque)
+{
+	struct rte_mbuf_ext_shared_info *shinfo;
+	void *buf_end = RTE_PTR_ADD(buf_addr, *buf_len);
+	void *addr;
+
+	addr = RTE_PTR_ALIGN_FLOOR(RTE_PTR_SUB(buf_end, sizeof(*shinfo)),
+				   sizeof(uintptr_t));
+	if (addr <= buf_addr)
+		return NULL;
+
+	shinfo = (struct rte_mbuf_ext_shared_info *)addr;
+	shinfo->free_cb = free_cb;
+	shinfo->fcb_opaque = fcb_opaque;
+	rte_mbuf_ext_refcnt_set(shinfo, 1);
+
+	*buf_len = (uint16_t)RTE_PTR_DIFF(shinfo, buf_addr);
+	return shinfo;
+}
+
+/**
+ * Attach an external buffer to a mbuf.
+ *
+ * User-managed anonymous buffer can be attached to an mbuf. When attaching
+ * it, corresponding free callback function and its argument should be
+ * provided via shinfo. This callback function will be called once all the
+ * mbufs are detached from the buffer (refcnt becomes zero).
+ *
+ * The headroom for the attaching mbuf will be set to zero and this can be
+ * properly adjusted after attachment. For example, ``rte_pktmbuf_adj()``
+ * or ``rte_pktmbuf_reset_headroom()`` might be used.
+ *
+ * More mbufs can be attached to the same external buffer by
+ * ``rte_pktmbuf_attach()`` once the external buffer has been attached by
+ * this API.
+ *
+ * Detachment can be done by either ``rte_pktmbuf_detach_extbuf()`` or
+ * ``rte_pktmbuf_detach()``.
+ *
+ * Memory for shared data must be provided and user must initialize all of
+ * the content properly, escpecially free callback and refcnt. The pointer
+ * of shared data will be stored in m->shinfo.
+ * ``rte_pktmbuf_ext_shinfo_init_helper`` can help to simply spare a few
+ * bytes at the end of buffer for the shared data, store free callback and
+ * its argument and set the refcnt to 1. The following is an example:
+ *
+ *   struct rte_mbuf_ext_shared_info *shinfo =
+ *          rte_pktmbuf_ext_shinfo_init_helper(buf_addr, &buf_len,
+ *                                             free_cb, fcb_arg);
+ *   rte_pktmbuf_attach_extbuf(m, buf_addr, buf_iova, buf_len, shinfo);
+ *   rte_pktmbuf_reset_headroom(m);
+ *   rte_pktmbuf_adj(m, data_len);
+ *
+ * Attaching an external buffer is quite similar to mbuf indirection in
+ * replacing buffer addresses and length of a mbuf, but a few differences:
+ * - When an indirect mbuf is attached, refcnt of the direct mbuf would be
+ *   2 as long as the direct mbuf itself isn't freed after the attachment.
+ *   In such cases, the buffer area of a direct mbuf must be read-only. But
+ *   external buffer has its own refcnt and it starts from 1. Unless
+ *   multiple mbufs are attached to a mbuf having an external buffer, the
+ *   external buffer is writable.
+ * - There's no need to allocate buffer from a mempool. Any buffer can be
+ *   attached with appropriate free callback and its IO address.
+ * - Smaller metadata is required to maintain shared data such as refcnt.
+ *
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice.
+ * Once external buffer is enabled by allowing experimental API,
+ * ``RTE_MBUF_DIRECT()`` and ``RTE_MBUF_INDIRECT()`` are no longer
+ * exclusive. A mbuf can be considered direct if it is neither indirect nor
+ * having external buffer.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param buf_addr
+ *   The pointer to the external buffer.
+ * @param buf_iova
+ *   IO address of the external buffer.
+ * @param buf_len
+ *   The size of the external buffer.
+ * @param shinfo
+ *   User-provided memory for shared data of the external buffer.
+ */
+static inline void __rte_experimental
+rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
+	rte_iova_t buf_iova, uint16_t buf_len,
+	struct rte_mbuf_ext_shared_info *shinfo)
+{
+	/* mbuf should not be read-only */
+	RTE_ASSERT(RTE_MBUF_DIRECT(m) && rte_mbuf_refcnt_read(m) == 1);
+	RTE_ASSERT(shinfo->free_cb != NULL);
+
+	m->buf_addr = buf_addr;
+	m->buf_iova = buf_iova;
+	m->buf_len = buf_len;
+
+	m->data_len = 0;
+	m->data_off = 0;
+
+	m->ol_flags |= EXT_ATTACHED_MBUF;
+	m->shinfo = shinfo;
+}
+
+/**
+ * Detach the external buffer attached to a mbuf, same as
+ * ``rte_pktmbuf_detach()``
+ *
+ * @param m
+ *   The mbuf having external buffer.
+ */
+#define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
- * After attachment we refer the mbuf we attached as 'indirect',
- * while mbuf we attached to as 'direct'.
- * The direct mbuf's reference counter is incremented.
+ * If the mbuf we are attaching to isn't a direct buffer and is attached to
+ * an external buffer, the mbuf being attached will be attached to the
+ * external buffer instead of mbuf indirection.
+ *
+ * Otherwise, the mbuf will be indirectly attached. After attachment we
+ * refer the mbuf we attached as 'indirect', while mbuf we attached to as
+ * 'direct'.  The direct mbuf's reference counter is incremented.
  *
  * Right now, not supported:
  *  - attachment for already indirect mbuf (e.g. - mi has to be direct).
@@ -1299,19 +1498,20 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
  */
 static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 {
-	struct rte_mbuf *md;
-
 	RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
 	    rte_mbuf_refcnt_read(mi) == 1);
 
-	/* if m is not direct, get the mbuf that embeds the data */
-	if (RTE_MBUF_DIRECT(m))
-		md = m;
-	else
-		md = rte_mbuf_from_indirect(m);
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		rte_mbuf_ext_refcnt_update(m->shinfo, 1);
+		mi->ol_flags = m->ol_flags;
+		mi->shinfo = m->shinfo;
+	} else {
+		/* if m is not direct, get the mbuf that embeds the data */
+		rte_mbuf_refcnt_update(rte_mbuf_from_indirect(m), 1);
+		mi->priv_size = m->priv_size;
+		mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
+	}
 
-	rte_mbuf_refcnt_update(md, 1);
-	mi->priv_size = m->priv_size;
 	mi->buf_iova = m->buf_iova;
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
@@ -1327,7 +1527,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
 	mi->nb_segs = 1;
-	mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
 	mi->packet_type = m->packet_type;
 	mi->timestamp = m->timestamp;
 
@@ -1336,12 +1535,52 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 }
 
 /**
- * Detach an indirect packet mbuf.
+ * @internal used by rte_pktmbuf_detach().
+ *
+ * Decrement the reference counter of the external buffer. When the
+ * reference counter becomes 0, the buffer is freed by pre-registered
+ * callback.
+ */
+static inline void
+__rte_pktmbuf_free_extbuf(struct rte_mbuf *m)
+{
+	RTE_ASSERT(RTE_MBUF_HAS_EXTBUF(m));
+	RTE_ASSERT(m->shinfo != NULL);
+
+	if (rte_mbuf_ext_refcnt_update(m->shinfo, -1) == 0)
+		m->shinfo->free_cb(m->buf_addr, m->shinfo->fcb_opaque);
+}
+
+/**
+ * @internal used by rte_pktmbuf_detach().
+ *
+ * Decrement the direct mbuf's reference counter. When the reference
+ * counter becomes 0, the direct mbuf is freed.
+ */
+static inline void
+__rte_pktmbuf_free_direct(struct rte_mbuf *m)
+{
+	struct rte_mbuf *md;
+
+	RTE_ASSERT(RTE_MBUF_INDIRECT(m));
+
+	md = rte_mbuf_from_indirect(m);
+
+	if (rte_mbuf_refcnt_update(md, -1) == 0) {
+		md->next = NULL;
+		md->nb_segs = 1;
+		rte_mbuf_refcnt_set(md, 1);
+		rte_mbuf_raw_free(md);
+	}
+}
+
+/**
+ * Detach a packet mbuf from external buffer or direct buffer.
  *
+ *  - decrement refcnt and free the external/direct buffer if refcnt
+ *    becomes zero.
  *  - restore original mbuf address and length values.
  *  - reset pktmbuf data and data_len to their default values.
- *  - decrement the direct mbuf's reference counter. When the
- *  reference counter becomes 0, the direct mbuf is freed.
  *
  * All other fields of the given packet mbuf will be left intact.
  *
@@ -1350,12 +1589,17 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
  */
 static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 {
-	struct rte_mbuf *md = rte_mbuf_from_indirect(m);
 	struct rte_mempool *mp = m->pool;
-	uint32_t mbuf_size, buf_len, priv_size;
+	uint32_t mbuf_size, buf_len;
+	uint16_t priv_size;
+
+	if (RTE_MBUF_HAS_EXTBUF(m))
+		__rte_pktmbuf_free_extbuf(m);
+	else
+		__rte_pktmbuf_free_direct(m);
 
 	priv_size = rte_pktmbuf_priv_size(mp);
-	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
+	mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size);
 	buf_len = rte_pktmbuf_data_room_size(mp);
 
 	m->priv_size = priv_size;
@@ -1365,13 +1609,6 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
 	m->ol_flags = 0;
-
-	if (rte_mbuf_refcnt_update(md, -1) == 0) {
-		md->next = NULL;
-		md->nb_segs = 1;
-		rte_mbuf_refcnt_set(md, 1);
-		rte_mbuf_raw_free(md);
-	}
 }
 
 /**
@@ -1395,7 +1632,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 
 	if (likely(rte_mbuf_refcnt_read(m) == 1)) {
 
-		if (RTE_MBUF_INDIRECT(m))
+		if (!RTE_MBUF_DIRECT(m))
 			rte_pktmbuf_detach(m);
 
 		if (m->next != NULL) {
@@ -1407,7 +1644,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 
 	} else if (__rte_mbuf_refcnt_update(m, -1) == 0) {
 
-		if (RTE_MBUF_INDIRECT(m))
+		if (!RTE_MBUF_DIRECT(m))
 			rte_pktmbuf_detach(m);
 
 		if (m->next != NULL) {
@@ -1690,7 +1927,10 @@ static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m,
 	if (unlikely(len > rte_pktmbuf_headroom(m)))
 		return NULL;
 
-	m->data_off -= len;
+	/* NB: elaborating the subtraction like this instead of using
+	 *     -= allows us to ensure the result type is uint16_t
+	 *     avoiding compiler warnings on gcc 8.1 at least */
+	m->data_off = (uint16_t)(m->data_off - len);
 	m->data_len = (uint16_t)(m->data_len + len);
 	m->pkt_len  = (m->pkt_len + len);
 
@@ -1750,8 +1990,11 @@ static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len)
 	if (unlikely(len > m->data_len))
 		return NULL;
 
+	/* NB: elaborating the addition like this instead of using
+	 *     += allows us to ensure the result type is uint16_t
+	 *     avoiding compiler warnings on gcc 8.1 at least */
 	m->data_len = (uint16_t)(m->data_len - len);
-	m->data_off += len;
+	m->data_off = (uint16_t)(m->data_off + len);
 	m->pkt_len  = (m->pkt_len - len);
 	return (char *)m->buf_addr + m->data_off;
 }
@@ -1863,8 +2106,11 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
 	cur_tail = rte_pktmbuf_lastseg(head);
 	cur_tail->next = tail;
 
-	/* accumulate number of segments and total length. */
-	head->nb_segs += tail->nb_segs;
+	/* accumulate number of segments and total length.
+	 * NB: elaborating the addition like this instead of using
+	 *     -= allows us to ensure the result type is uint16_t
+	 *     avoiding compiler warnings on gcc 8.1 at least */
+	head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
 	head->pkt_len += tail->pkt_len;
 
 	/* pkt_len is only set in the head */
@@ -1894,7 +2140,11 @@ rte_validate_tx_offload(const struct rte_mbuf *m)
 		return 0;
 
 	if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
-		inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+		/* NB: elaborating the addition like this instead of using
+		 *     += gives the result uint64_t type instead of int,
+		 *     avoiding compiler warnings on gcc 8.1 at least */
+		inner_l3_offset = inner_l3_offset + m->outer_l2_len +
+				  m->outer_l3_len;
 
 	/* Headers are fragmented */
 	if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
@@ -1939,7 +2189,7 @@ rte_validate_tx_offload(const struct rte_mbuf *m)
 static inline int
 rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
 {
-	int seg_len, copy_len;
+	size_t seg_len, copy_len;
 	struct rte_mbuf *m;
 	struct rte_mbuf *m_next;
 	char *buffer;
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c b/lib/librte_mbuf/rte_mbuf_pool_ops.c
index 48cc3420..5722976f 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.c
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c
@@ -9,7 +9,7 @@
 #include <rte_errno.h>
 #include <rte_mbuf_pool_ops.h>
 
-int __rte_experimental
+int
 rte_mbuf_set_platform_mempool_ops(const char *ops_name)
 {
 	const struct rte_memzone *mz;
@@ -23,7 +23,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name)
 			RTE_MEMPOOL_OPS_NAMESIZE, SOCKET_ID_ANY, 0);
 		if (mz == NULL)
 			return -rte_errno;
-		strncpy(mz->addr, ops_name, strlen(ops_name));
+		strcpy(mz->addr, ops_name);
 		return 0;
 	} else if (strcmp(mz->addr, ops_name) == 0) {
 		return 0;
@@ -35,7 +35,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name)
 	return -EEXIST;
 }
 
-const char * __rte_experimental
+const char *
 rte_mbuf_platform_mempool_ops(void)
 {
 	const struct rte_memzone *mz;
@@ -46,7 +46,7 @@ rte_mbuf_platform_mempool_ops(void)
 	return mz->addr;
 }
 
-int __rte_experimental
+int
 rte_mbuf_set_user_mempool_ops(const char *ops_name)
 {
 	const struct rte_memzone *mz;
@@ -62,12 +62,12 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name)
 			return -rte_errno;
 	}
 
-	strncpy(mz->addr, ops_name, strlen(ops_name));
+	strcpy(mz->addr, ops_name);
 	return 0;
 
 }
 
-const char * __rte_experimental
+const char *
 rte_mbuf_user_mempool_ops(void)
 {
 	const struct rte_memzone *mz;
@@ -79,7 +79,7 @@ rte_mbuf_user_mempool_ops(void)
 }
 
 /* Return mbuf pool ops name */
-const char * __rte_experimental
+const char *
 rte_mbuf_best_mempool_ops(void)
 {
 	/* User defined mempool ops takes the priority */
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.h b/lib/librte_mbuf/rte_mbuf_pool_ops.h
index ebf5bf0f..7ed95a49 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.h
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.h
@@ -12,9 +12,6 @@
  * These APIs are for configuring the mbuf pool ops names to be largely used by
  * rte_pktmbuf_pool_create(). However, this can also be used to set and inquire
  * the best mempool ops available.
- *
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
  */
 
 #include <rte_compat.h>
@@ -34,7 +31,7 @@ extern "C" {
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int __rte_experimental
+int
 rte_mbuf_set_platform_mempool_ops(const char *ops_name);
 
 /**
@@ -46,7 +43,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name);
  *   - On success, platform pool ops name.
  *   - On failure, NULL.
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_platform_mempool_ops(void);
 
 /**
@@ -60,7 +57,7 @@ rte_mbuf_platform_mempool_ops(void);
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int __rte_experimental
+int
 rte_mbuf_set_user_mempool_ops(const char *ops_name);
 
 /**
@@ -72,7 +69,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name);
  *   - On success, user pool ops name..
  *   - On failure, NULL.
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_user_mempool_ops(void);
 
 /**
@@ -87,7 +84,7 @@ rte_mbuf_user_mempool_ops(void);
  * @return
  *   returns preferred mbuf pool ops name
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_best_mempool_ops(void);
 
 
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
index 1feefacc..d7835e28 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.c
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -65,6 +65,9 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype)
 	case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU";
 	case RTE_PTYPE_TUNNEL_ESP: return "TUNNEL_ESP";
 	case RTE_PTYPE_TUNNEL_L2TP: return "TUNNEL_L2TP";
+	case RTE_PTYPE_TUNNEL_VXLAN_GPE: return "TUNNEL_VXLAN_GPE";
+	case RTE_PTYPE_TUNNEL_MPLS_IN_UDP: return "TUNNEL_MPLS_IN_UDP";
+	case RTE_PTYPE_TUNNEL_MPLS_IN_GRE: return "TUNNEL_MPLS_IN_GRE";
 	default: return "TUNNEL_UNKNOWN";
 	}
 }
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index b9a33811..01acc66e 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -423,6 +423,53 @@ extern "C" {
  */
 #define RTE_PTYPE_TUNNEL_L2TP               0x0000a000
 /**
+ * VXLAN-GPE (VXLAN Generic Protocol Extension) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=4790>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=4790>
+ */
+#define RTE_PTYPE_TUNNEL_VXLAN_GPE          0x0000b000
+/**
+ * MPLS-in-GRE tunneling packet type (RFC 4023).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol'=0x8847>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol'=0x8848>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'protocol'=47
+ * | 'protocol'=0x8847>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47
+ * | 'protocol'=0x8848>
+ */
+#define RTE_PTYPE_TUNNEL_MPLS_IN_GRE       0x0000c000
+/**
+ * MPLS-in-UDP tunneling packet type (RFC 7510).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=6635>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=6635>
+ */
+#define RTE_PTYPE_TUNNEL_MPLS_IN_UDP      0x0000d000
+/**
  * Mask of tunneling packet types.
  */
 #define RTE_PTYPE_TUNNEL_MASK               0x0000f000
@@ -606,9 +653,9 @@ extern "C" {
 #define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
 
 /**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
+ * Check if the (outer) L3 header is IPv6. To avoid comparing IPv6 types one by
+ * one, bit 6 is selected to be used for IPv6 only. Then checking bit 6 can
+ * determine if it is an IPV6 packet.
  */
 #define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
 
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index d418dcb8..cae68db8 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -1,7 +1,6 @@
 DPDK_2.0 {
 	global:
 
-	rte_ctrlmbuf_init;
 	rte_get_rx_ol_flag_name;
 	rte_get_tx_ol_flag_name;
 	rte_mbuf_sanity_check;
@@ -36,7 +35,7 @@ DPDK_16.11 {
 
 } DPDK_2.1;
 
-EXPERIMENTAL {
+DPDK_18.08 {
 	global:
 
 	rte_mbuf_best_mempool_ops;
@@ -45,5 +44,4 @@ EXPERIMENTAL {
 	rte_mbuf_set_user_mempool_ops;
 	rte_mbuf_user_mempool_ops;
 	rte_pktmbuf_pool_create_by_ops;
-
 } DPDK_16.11;
diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c
index e147dd1f..702c01d3 100644
--- a/lib/librte_member/rte_member.c
+++ b/lib/librte_member/rte_member.c
@@ -297,10 +297,7 @@ rte_member_reset(const struct rte_member_setsum *setsum)
 	}
 }
 
-RTE_INIT(librte_member_init_log);
-
-static void
-librte_member_init_log(void)
+RTE_INIT(librte_member_init_log)
 {
 	librte_member_logtype = rte_log_register("lib.member");
 	if (librte_member_logtype >= 0)
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index 24e735a3..20bf63fb 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -7,15 +7,20 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_mempool.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_ring
 
 EXPORT_MAP := rte_mempool_version.map
 
-LIBABIVER := 3
+LIBABIVER := 5
+
+# memseg walk is not yet part of stable API
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool.c
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops_default.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
 
diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index 7a4f3dae..38d7ae89 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -1,7 +1,21 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 2
-sources = files('rte_mempool.c', 'rte_mempool_ops.c')
+allow_experimental_apis = true
+
+extra_flags = []
+
+foreach flag: extra_flags
+	if cc.has_argument(flag)
+		cflags += flag
+	endif
+endforeach
+
+version = 5
+sources = files('rte_mempool.c', 'rte_mempool_ops.c',
+		'rte_mempool_ops_default.c')
 headers = files('rte_mempool.h')
 deps += ['ring']
+
+# memseg walk is not yet part of stable API
+allow_experimental_apis = true
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 54f7f4ba..03e6b5f7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -3,6 +3,7 @@
  * Copyright(c) 2016 6WIND S.A.
  */
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
@@ -98,8 +99,31 @@ static unsigned optimize_object_size(unsigned obj_size)
 	return new_obj_size * RTE_MEMPOOL_ALIGN;
 }
 
+static int
+find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
+{
+	size_t *min = arg;
+
+	if (msl->page_sz < *min)
+		*min = msl->page_sz;
+
+	return 0;
+}
+
+static size_t
+get_min_page_size(void)
+{
+	size_t min_pagesz = SIZE_MAX;
+
+	rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+
+	return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+}
+
+
 static void
-mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
+mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
+		 void *obj, rte_iova_t iova)
 {
 	struct rte_mempool_objhdr *hdr;
 	struct rte_mempool_objtlr *tlr __rte_unused;
@@ -116,9 +140,6 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
 	tlr = __mempool_get_trailer(obj);
 	tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
 #endif
-
-	/* enqueue in ring */
-	rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
 }
 
 /* call obj_cb() for each mempool element */
@@ -204,92 +225,6 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 	return sz->total_size;
 }
 
-
-/*
- * Calculate maximum amount of memory required to store given number of objects.
- */
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
-		      unsigned int flags)
-{
-	size_t obj_per_page, pg_num, pg_sz;
-	unsigned int mask;
-
-	mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
-	if ((flags & mask) == mask)
-		/* alignment need one additional object */
-		elt_num += 1;
-
-	if (total_elt_sz == 0)
-		return 0;
-
-	if (pg_shift == 0)
-		return total_elt_sz * elt_num;
-
-	pg_sz = (size_t)1 << pg_shift;
-	obj_per_page = pg_sz / total_elt_sz;
-	if (obj_per_page == 0)
-		return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num;
-
-	pg_num = (elt_num + obj_per_page - 1) / obj_per_page;
-	return pg_num << pg_shift;
-}
-
-/*
- * Calculate how much memory would be actually required with the
- * given memory footprint to store required number of elements.
- */
-ssize_t
-rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
-	size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
-	uint32_t pg_shift, unsigned int flags)
-{
-	uint32_t elt_cnt = 0;
-	rte_iova_t start, end;
-	uint32_t iova_idx;
-	size_t pg_sz = (size_t)1 << pg_shift;
-	unsigned int mask;
-
-	mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
-	if ((flags & mask) == mask)
-		/* alignment need one additional object */
-		elt_num += 1;
-
-	/* if iova is NULL, assume contiguous memory */
-	if (iova == NULL) {
-		start = 0;
-		end = pg_sz * pg_num;
-		iova_idx = pg_num;
-	} else {
-		start = iova[0];
-		end = iova[0] + pg_sz;
-		iova_idx = 1;
-	}
-	while (elt_cnt < elt_num) {
-
-		if (end - start >= total_elt_sz) {
-			/* enough contiguous memory, add an object */
-			start += total_elt_sz;
-			elt_cnt++;
-		} else if (iova_idx < pg_num) {
-			/* no room to store one obj, add a page */
-			if (end == iova[iova_idx]) {
-				end += pg_sz;
-			} else {
-				start = iova[iova_idx];
-				end = iova[iova_idx] + pg_sz;
-			}
-			iova_idx++;
-
-		} else {
-			/* no more page, return how many elements fit */
-			return -(size_t)elt_cnt;
-		}
-	}
-
-	return (size_t)iova_idx << pg_shift;
-}
-
 /* free a memchunk allocated with rte_memzone_reserve() */
 static void
 rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
@@ -323,6 +258,21 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
 	}
 }
 
+static int
+mempool_ops_alloc_once(struct rte_mempool *mp)
+{
+	int ret;
+
+	/* create the internal ring if not already done */
+	if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+		ret = rte_mempool_ops_alloc(mp);
+		if (ret != 0)
+			return ret;
+		mp->flags |= MEMPOOL_F_POOL_CREATED;
+	}
+	return 0;
+}
+
 /* Add objects in the pool, using a physically contiguous memory
  * zone. Return the number of objects added, or a negative value
  * on error.
@@ -332,51 +282,19 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
 	rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
 	void *opaque)
 {
-	unsigned total_elt_sz;
-	unsigned int mp_capa_flags;
 	unsigned i = 0;
 	size_t off;
 	struct rte_mempool_memhdr *memhdr;
 	int ret;
 
-	/* create the internal ring if not already done */
-	if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
-		ret = rte_mempool_ops_alloc(mp);
-		if (ret != 0)
-			return ret;
-		mp->flags |= MEMPOOL_F_POOL_CREATED;
-	}
-
-	/* Notify memory area to mempool */
-	ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len);
-	if (ret != -ENOTSUP && ret < 0)
+	ret = mempool_ops_alloc_once(mp);
+	if (ret != 0)
 		return ret;
 
 	/* mempool is already populated */
 	if (mp->populated_size >= mp->size)
 		return -ENOSPC;
 
-	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-
-	/* Get mempool capabilities */
-	mp_capa_flags = 0;
-	ret = rte_mempool_ops_get_capabilities(mp, &mp_capa_flags);
-	if ((ret < 0) && (ret != -ENOTSUP))
-		return ret;
-
-	/* update mempool capabilities */
-	mp->flags |= mp_capa_flags;
-
-	/* Detect pool area has sufficient space for elements */
-	if (mp_capa_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
-		if (len < total_elt_sz * mp->size) {
-			RTE_LOG(ERR, MEMPOOL,
-				"pool area %" PRIx64 " not enough\n",
-				(uint64_t)len);
-			return -ENOSPC;
-		}
-	}
-
 	memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0);
 	if (memhdr == NULL)
 		return -ENOMEM;
@@ -388,89 +306,34 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
 	memhdr->free_cb = free_cb;
 	memhdr->opaque = opaque;
 
-	if (mp_capa_flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
-		/* align object start address to a multiple of total_elt_sz */
-		off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
-	else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
+	if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
 		off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
 	else
 		off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
 
-	while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
-		off += mp->header_size;
-		if (iova == RTE_BAD_IOVA)
-			mempool_add_elem(mp, (char *)vaddr + off,
-				RTE_BAD_IOVA);
-		else
-			mempool_add_elem(mp, (char *)vaddr + off, iova + off);
-		off += mp->elt_size + mp->trailer_size;
-		i++;
+	if (off > len) {
+		ret = -EINVAL;
+		goto fail;
 	}
 
+	i = rte_mempool_ops_populate(mp, mp->size - mp->populated_size,
+		(char *)vaddr + off,
+		(iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off),
+		len - off, mempool_add_elem, NULL);
+
 	/* not enough room to store one object */
-	if (i == 0)
-		return -EINVAL;
+	if (i == 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
 
 	STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next);
 	mp->nb_mem_chunks++;
 	return i;
-}
-
-int
-rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
-	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
-	void *opaque)
-{
-	return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque);
-}
-
-/* Add objects in the pool, using a table of physical pages. Return the
- * number of objects added, or a negative value on error.
- */
-int
-rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
-	const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
-	uint32_t i, n;
-	int ret, cnt = 0;
-	size_t pg_sz = (size_t)1 << pg_shift;
-
-	/* mempool must not be populated */
-	if (mp->nb_mem_chunks != 0)
-		return -EEXIST;
-
-	if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
-		return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA,
-			pg_num * pg_sz, free_cb, opaque);
-
-	for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) {
-
-		/* populate with the largest group of contiguous pages */
-		for (n = 1; (i + n) < pg_num &&
-			     iova[i + n - 1] + pg_sz == iova[i + n]; n++)
-			;
-
-		ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz,
-			iova[i], n * pg_sz, free_cb, opaque);
-		if (ret < 0) {
-			rte_mempool_free_memchunks(mp);
-			return ret;
-		}
-		/* no need to call the free callback for next chunks */
-		free_cb = NULL;
-		cnt += ret;
-	}
-	return cnt;
-}
 
-int
-rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
-	return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift,
-			free_cb, opaque);
+fail:
+	rte_free(memhdr);
+	return ret;
 }
 
 /* Populate the mempool with a virtual area. Return the number of
@@ -485,16 +348,13 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
 	size_t off, phys_len;
 	int ret, cnt = 0;
 
-	/* mempool must not be populated */
-	if (mp->nb_mem_chunks != 0)
-		return -EEXIST;
 	/* address and len must be page-aligned */
 	if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr)
 		return -EINVAL;
 	if (RTE_ALIGN_CEIL(len, pg_sz) != len)
 		return -EINVAL;
 
-	if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+	if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
 		return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA,
 			len, free_cb, opaque);
 
@@ -544,39 +404,94 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
-	size_t size, total_elt_sz, align, pg_sz, pg_shift;
+	ssize_t mem_size;
+	size_t align, pg_sz, pg_shift;
 	rte_iova_t iova;
 	unsigned mz_id, n;
-	unsigned int mp_flags;
 	int ret;
+	bool no_contig, try_contig, no_pageshift;
+
+	ret = mempool_ops_alloc_once(mp);
+	if (ret != 0)
+		return ret;
 
 	/* mempool must not be populated */
 	if (mp->nb_mem_chunks != 0)
 		return -EEXIST;
 
-	/* Get mempool capabilities */
-	mp_flags = 0;
-	ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
-	if ((ret < 0) && (ret != -ENOTSUP))
-		return ret;
+	no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG;
 
-	/* update mempool capabilities */
-	mp->flags |= mp_flags;
+	/*
+	 * the following section calculates page shift and page size values.
+	 *
+	 * these values impact the result of calc_mem_size operation, which
+	 * returns the amount of memory that should be allocated to store the
+	 * desired number of objects. when not zero, it allocates more memory
+	 * for the padding between objects, to ensure that an object does not
+	 * cross a page boundary. in other words, page size/shift are to be set
+	 * to zero if mempool elements won't care about page boundaries.
+	 * there are several considerations for page size and page shift here.
+	 *
+	 * if we don't need our mempools to have physically contiguous objects,
+	 * then just set page shift and page size to 0, because the user has
+	 * indicated that there's no need to care about anything.
+	 *
+	 * if we do need contiguous objects, there is also an option to reserve
+	 * the entire mempool memory as one contiguous block of memory, in
+	 * which case the page shift and alignment wouldn't matter as well.
+	 *
+	 * if we require contiguous objects, but not necessarily the entire
+	 * mempool reserved space to be contiguous, then there are two options.
+	 *
+	 * if our IO addresses are virtual, not actual physical (IOVA as VA
+	 * case), then no page shift needed - our memory allocation will give us
+	 * contiguous IO memory as far as the hardware is concerned, so
+	 * act as if we're getting contiguous memory.
+	 *
+	 * if our IO addresses are physical, we may get memory from bigger
+	 * pages, or we might get memory from smaller pages, and how much of it
+	 * we require depends on whether we want bigger or smaller pages.
+	 * However, requesting each and every memory size is too much work, so
+	 * what we'll do instead is walk through the page sizes available, pick
+	 * the smallest one and set up page shift to match that one. We will be
+	 * wasting some space this way, but it's much nicer than looping around
+	 * trying to reserve each and every page size.
+	 *
+	 * However, since size calculation will produce page-aligned sizes, it
+	 * makes sense to first try and see if we can reserve the entire memzone
+	 * in one contiguous chunk as well (otherwise we might end up wasting a
+	 * 1G page on a 10MB memzone). If we fail to get enough contiguous
+	 * memory, then we'll go and reserve space page-by-page.
+	 */
+	no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA;
+	try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages();
 
-	if (rte_eal_has_hugepages()) {
-		pg_shift = 0; /* not needed, zone is physically contiguous */
+	if (no_pageshift) {
 		pg_sz = 0;
-		align = RTE_CACHE_LINE_SIZE;
+		pg_shift = 0;
+	} else if (try_contig) {
+		pg_sz = get_min_page_size();
+		pg_shift = rte_bsf32(pg_sz);
 	} else {
 		pg_sz = getpagesize();
 		pg_shift = rte_bsf32(pg_sz);
-		align = pg_sz;
 	}
 
-	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 	for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
-		size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
-						mp->flags);
+		size_t min_chunk_size;
+		unsigned int flags;
+
+		if (try_contig || no_pageshift)
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					0, &min_chunk_size, &align);
+		else
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					pg_shift, &min_chunk_size, &align);
+
+		if (mem_size < 0) {
+			ret = mem_size;
+			goto fail;
+		}
 
 		ret = snprintf(mz_name, sizeof(mz_name),
 			RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -585,30 +500,70 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 			goto fail;
 		}
 
-		mz = rte_memzone_reserve_aligned(mz_name, size,
-			mp->socket_id, mz_flags, align);
-		/* not enough memory, retry with the biggest zone we have */
-		if (mz == NULL)
+		flags = mz_flags;
+
+		/* if we're trying to reserve contiguous memory, add appropriate
+		 * memzone flag.
+		 */
+		if (try_contig)
+			flags |= RTE_MEMZONE_IOVA_CONTIG;
+
+		mz = rte_memzone_reserve_aligned(mz_name, mem_size,
+				mp->socket_id, flags, align);
+
+		/* if we were trying to allocate contiguous memory, failed and
+		 * minimum required contiguous chunk fits minimum page, adjust
+		 * memzone size to the page size, and try again.
+		 */
+		if (mz == NULL && try_contig && min_chunk_size <= pg_sz) {
+			try_contig = false;
+			flags &= ~RTE_MEMZONE_IOVA_CONTIG;
+
+			mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+					pg_shift, &min_chunk_size, &align);
+			if (mem_size < 0) {
+				ret = mem_size;
+				goto fail;
+			}
+
+			mz = rte_memzone_reserve_aligned(mz_name, mem_size,
+				mp->socket_id, flags, align);
+		}
+		/* don't try reserving with 0 size if we were asked to reserve
+		 * IOVA-contiguous memory.
+		 */
+		if (min_chunk_size < (size_t)mem_size && mz == NULL) {
+			/* not enough memory, retry with the biggest zone we
+			 * have
+			 */
 			mz = rte_memzone_reserve_aligned(mz_name, 0,
-				mp->socket_id, mz_flags, align);
+					mp->socket_id, flags,
+					RTE_MAX(pg_sz, align));
+		}
 		if (mz == NULL) {
 			ret = -rte_errno;
 			goto fail;
 		}
 
-		if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+		if (mz->len < min_chunk_size) {
+			rte_memzone_free(mz);
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		if (no_contig)
 			iova = RTE_BAD_IOVA;
 		else
 			iova = mz->iova;
 
-		if (rte_eal_has_hugepages())
+		if (no_pageshift || try_contig)
 			ret = rte_mempool_populate_iova(mp, mz->addr,
 				iova, mz->len,
 				rte_mempool_memchunk_mz_free,
 				(void *)(uintptr_t)mz);
 		else
 			ret = rte_mempool_populate_virt(mp, mz->addr,
-				mz->len, pg_sz,
+				RTE_ALIGN_FLOOR(mz->len, pg_sz), pg_sz,
 				rte_mempool_memchunk_mz_free,
 				(void *)(uintptr_t)mz);
 		if (ret < 0) {
@@ -625,16 +580,18 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 }
 
 /* return the memory size required for mempool objects in anonymous mem */
-static size_t
+static ssize_t
 get_anon_size(const struct rte_mempool *mp)
 {
-	size_t size, total_elt_sz, pg_sz, pg_shift;
+	ssize_t size;
+	size_t pg_sz, pg_shift;
+	size_t min_chunk_size;
+	size_t align;
 
 	pg_sz = getpagesize();
 	pg_shift = rte_bsf32(pg_sz);
-	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-	size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
-					mp->flags);
+	size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift,
+					     &min_chunk_size, &align);
 
 	return size;
 }
@@ -644,25 +601,45 @@ static void
 rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr *memhdr,
 	void *opaque)
 {
-	munmap(opaque, get_anon_size(memhdr->mp));
+	ssize_t size;
+
+	/*
+	 * Calculate size since memhdr->len has contiguous chunk length
+	 * which may be smaller if anon map is split into many contiguous
+	 * chunks. Result must be the same as we calculated on populate.
+	 */
+	size = get_anon_size(memhdr->mp);
+	if (size < 0)
+		return;
+
+	munmap(opaque, size);
 }
 
 /* populate the mempool with an anonymous mapping */
 int
 rte_mempool_populate_anon(struct rte_mempool *mp)
 {
-	size_t size;
+	ssize_t size;
 	int ret;
 	char *addr;
 
 	/* mempool is already populated, error */
-	if (!STAILQ_EMPTY(&mp->mem_list)) {
+	if ((!STAILQ_EMPTY(&mp->mem_list)) || mp->nb_mem_chunks != 0) {
 		rte_errno = EINVAL;
 		return 0;
 	}
 
-	/* get chunk of virtually continuous memory */
+	ret = mempool_ops_alloc_once(mp);
+	if (ret != 0)
+		return ret;
+
 	size = get_anon_size(mp);
+	if (size < 0) {
+		rte_errno = -size;
+		return 0;
+	}
+
+	/* get chunk of virtually continuous memory */
 	addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
 		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (addr == MAP_FAILED) {
@@ -795,6 +772,12 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 
 	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
 
+	/* asked for zero items */
+	if (n == 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
 	/* asked cache too big */
 	if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE ||
 	    CALC_CACHE_FLUSHTHRESH(cache_size) > n) {
@@ -944,66 +927,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 	return NULL;
 }
 
-/*
- * Create the mempool over already allocated chunk of memory.
- * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
- * behavior.
- */
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags, void *vaddr,
-		const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift)
-{
-	struct rte_mempool *mp = NULL;
-	int ret;
-
-	/* no virtual address supplied, use rte_mempool_create() */
-	if (vaddr == NULL)
-		return rte_mempool_create(name, n, elt_size, cache_size,
-			private_data_size, mp_init, mp_init_arg,
-			obj_init, obj_init_arg, socket_id, flags);
-
-	/* check that we have both VA and PA */
-	if (iova == NULL) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
-	/* Check that pg_shift parameter is valid. */
-	if (pg_shift > MEMPOOL_PG_SHIFT_MAX) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
-	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
-		private_data_size, socket_id, flags);
-	if (mp == NULL)
-		return NULL;
-
-	/* call the mempool priv initializer */
-	if (mp_init)
-		mp_init(mp, mp_init_arg);
-
-	ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift,
-		NULL, NULL);
-	if (ret < 0 || ret != (int)mp->size)
-		goto fail;
-
-	/* call the object initializers */
-	if (obj_init)
-		rte_mempool_obj_iter(mp, obj_init, obj_init_arg);
-
-	return mp;
-
- fail:
-	rte_mempool_free(mp);
-	return NULL;
-}
-
 /* Return the number of entries in the mempool */
 unsigned int
 rte_mempool_avail_count(const struct rte_mempool *mp)
@@ -1132,6 +1055,36 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp,
 #endif
 }
 
+void
+rte_mempool_contig_blocks_check_cookies(const struct rte_mempool *mp,
+	void * const *first_obj_table_const, unsigned int n, int free)
+{
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	struct rte_mempool_info info;
+	const size_t total_elt_sz =
+		mp->header_size + mp->elt_size + mp->trailer_size;
+	unsigned int i, j;
+
+	rte_mempool_ops_get_info(mp, &info);
+
+	for (i = 0; i < n; ++i) {
+		void *first_obj = first_obj_table_const[i];
+
+		for (j = 0; j < info.contig_block_size; ++j) {
+			void *obj;
+
+			obj = (void *)((uintptr_t)first_obj + j * total_elt_sz);
+			rte_mempool_check_cookies(mp, &obj, 1, free);
+		}
+	}
+#else
+	RTE_SET_USED(mp);
+	RTE_SET_USED(first_obj_table_const);
+	RTE_SET_USED(n);
+	RTE_SET_USED(free);
+#endif
+}
+
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 static void
 mempool_obj_audit(struct rte_mempool *mp, __rte_unused void *opaque,
@@ -1197,6 +1150,7 @@ void
 rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 {
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	struct rte_mempool_info info;
 	struct rte_mempool_debug_stats sum;
 	unsigned lcore_id;
 #endif
@@ -1238,6 +1192,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 
 	/* sum and dump statistics */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	rte_mempool_ops_get_info(mp, &info);
 	memset(&sum, 0, sizeof(sum));
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		sum.put_bulk += mp->stats[lcore_id].put_bulk;
@@ -1246,6 +1201,8 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
 		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
 		sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs;
+		sum.get_success_blks += mp->stats[lcore_id].get_success_blks;
+		sum.get_fail_blks += mp->stats[lcore_id].get_fail_blks;
 	}
 	fprintf(f, "  stats:\n");
 	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
@@ -1254,6 +1211,11 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
 	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
 	fprintf(f, "    get_fail_objs=%"PRIu64"\n", sum.get_fail_objs);
+	if (info.contig_block_size > 0) {
+		fprintf(f, "    get_success_blks=%"PRIu64"\n",
+			sum.get_success_blks);
+		fprintf(f, "    get_fail_blks=%"PRIu64"\n", sum.get_fail_blks);
+	}
 #else
 	fprintf(f, "  no statistics available\n");
 #endif
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 8b1b7f7e..7c9cd9a2 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -70,6 +70,10 @@ struct rte_mempool_debug_stats {
 	uint64_t get_success_objs; /**< Objects successfully allocated. */
 	uint64_t get_fail_bulk;    /**< Failed allocation number. */
 	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
+	/** Successful allocation number of contiguous blocks. */
+	uint64_t get_success_blks;
+	/** Failed allocation number of contiguous blocks. */
+	uint64_t get_fail_blks;
 } __rte_cache_aligned;
 #endif
 
@@ -190,6 +194,20 @@ struct rte_mempool_memhdr {
 };
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Additional information about the mempool
+ *
+ * The structure is cache-line aligned to avoid ABI breakages in
+ * a number of cases when something small is added.
+ */
+struct rte_mempool_info {
+	/** Number of objects in the contiguous block */
+	unsigned int contig_block_size;
+} __rte_cache_aligned;
+
+/**
  * The RTE mempool structure.
  */
 struct rte_mempool {
@@ -244,25 +262,8 @@ struct rte_mempool {
 #define MEMPOOL_F_SP_PUT         0x0004 /**< Default put is "single-producer".*/
 #define MEMPOOL_F_SC_GET         0x0008 /**< Default get is "single-consumer".*/
 #define MEMPOOL_F_POOL_CREATED   0x0010 /**< Internal: pool is created. */
-#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */
-/**
- * This capability flag is advertised by a mempool handler, if the whole
- * memory area containing the objects must be physically contiguous.
- * Note: This flag should not be passed by application.
- */
-#define MEMPOOL_F_CAPA_PHYS_CONTIG 0x0040
-/**
- * This capability flag is advertised by a mempool handler. Used for a case
- * where mempool driver wants object start address(vaddr) aligned to block
- * size(/ total element size).
- *
- * Note:
- * - This flag should not be passed by application.
- *   Flag used for mempool driver only.
- * - Mempool driver must also set MEMPOOL_F_CAPA_PHYS_CONTIG flag along with
- *   MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS.
- */
-#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080
+#define MEMPOOL_F_NO_IOVA_CONTIG 0x0020 /**< Don't need IOVA contiguous objs. */
+#define MEMPOOL_F_NO_PHYS_CONTIG MEMPOOL_F_NO_IOVA_CONTIG /* deprecated */
 
 /**
  * @internal When debug is enabled, store some statistics.
@@ -282,8 +283,16 @@ struct rte_mempool {
 			mp->stats[__lcore_id].name##_bulk += 1;	\
 		}                                               \
 	} while(0)
+#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {                    \
+		unsigned int __lcore_id = rte_lcore_id();       \
+		if (__lcore_id < RTE_MAX_LCORE) {               \
+			mp->stats[__lcore_id].name##_blks += n;	\
+			mp->stats[__lcore_id].name##_bulk += 1;	\
+		}                                               \
+	} while (0)
 #else
 #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
+#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0)
 #endif
 
 /**
@@ -351,6 +360,38 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp,
 #define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0)
 #endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @internal Check contiguous object blocks and update cookies or panic.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param first_obj_table_const
+ *   Pointer to a table of void * pointers (first object of the contiguous
+ *   object blocks).
+ * @param n
+ *   Number of contiguous object blocks.
+ * @param free
+ *   - 0: object is supposed to be allocated, mark it as free
+ *   - 1: object is supposed to be free, mark it as allocated
+ *   - 2: just check that cookie is valid (free or allocated)
+ */
+void rte_mempool_contig_blocks_check_cookies(const struct rte_mempool *mp,
+	void * const *first_obj_table_const, unsigned int n, int free);
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+#define __mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \
+					      free) \
+	rte_mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \
+						free)
+#else
+#define __mempool_contig_blocks_check_cookies(mp, first_obj_table_const, n, \
+					      free) \
+	do {} while (0)
+#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
+
 #define RTE_MEMPOOL_OPS_NAMESIZE 32 /**< Max length of ops struct name. */
 
 /**
@@ -383,21 +424,135 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
 		void **obj_table, unsigned int n);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Dequeue a number of contiquous object blocks from the external pool.
+ */
+typedef int (*rte_mempool_dequeue_contig_blocks_t)(struct rte_mempool *mp,
+		 void **first_obj_table, unsigned int n);
+
+/**
  * Return the number of available objects in the external pool.
  */
 typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);
 
 /**
- * Get the mempool capabilities.
+ * Calculate memory size required to store given number of objects.
+ *
+ * If mempool objects are not required to be IOVA-contiguous
+ * (the flag MEMPOOL_F_NO_IOVA_CONTIG is set), min_chunk_size defines
+ * virtually contiguous chunk size. Otherwise, if mempool objects must
+ * be IOVA-contiguous (the flag MEMPOOL_F_NO_IOVA_CONTIG is clear),
+ * min_chunk_size defines IOVA-contiguous chunk size.
+ *
+ * @param[in] mp
+ *   Pointer to the memory pool.
+ * @param[in] obj_num
+ *   Number of objects.
+ * @param[in] pg_shift
+ *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ *   Location for minimum size of the memory chunk which may be used to
+ *   store memory pool objects.
+ * @param[out] align
+ *   Location for required memory chunk alignment.
+ * @return
+ *   Required memory size aligned at page boundary.
+ */
+typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp,
+		uint32_t obj_num,  uint32_t pg_shift,
+		size_t *min_chunk_size, size_t *align);
+
+/**
+ * Default way to calculate memory size required to store given number of
+ * objects.
+ *
+ * If page boundaries may be ignored, it is just a product of total
+ * object size including header and trailer and number of objects.
+ * Otherwise, it is a number of pages required to store given number of
+ * objects without crossing page boundary.
+ *
+ * Note that if object size is bigger than page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * Minimum size of memory chunk is a maximum of the page size and total
+ * element size.
+ *
+ * Required memory chunk alignment is a maximum of page size and cache
+ * line size.
  */
-typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
-		unsigned int *flags);
+ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+		uint32_t obj_num, uint32_t pg_shift,
+		size_t *min_chunk_size, size_t *align);
 
 /**
- * Notify new memory area to mempool.
+ * Function to be called for each populated object.
+ *
+ * @param[in] mp
+ *   A pointer to the mempool structure.
+ * @param[in] opaque
+ *   An opaque pointer passed to iterator.
+ * @param[in] vaddr
+ *   Object virtual address.
+ * @param[in] iova
+ *   Input/output virtual address of the object or RTE_BAD_IOVA.
  */
-typedef int (*rte_mempool_ops_register_memory_area_t)
-(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);
+typedef void (rte_mempool_populate_obj_cb_t)(struct rte_mempool *mp,
+		void *opaque, void *vaddr, rte_iova_t iova);
+
+/**
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * Populated objects should be enqueued to the pool, e.g. using
+ * rte_mempool_ops_enqueue_bulk().
+ *
+ * If the given IO address is unknown (iova = RTE_BAD_IOVA),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
+ * @param[in] mp
+ *   A pointer to the mempool structure.
+ * @param[in] max_objs
+ *   Maximum number of objects to be populated.
+ * @param[in] vaddr
+ *   The virtual address of memory that should be used to store objects.
+ * @param[in] iova
+ *   The IO address
+ * @param[in] len
+ *   The length of memory in bytes.
+ * @param[in] obj_cb
+ *   Callback function to be executed for each populated object.
+ * @param[in] obj_cb_arg
+ *   An opaque pointer passed to the callback function.
+ * @return
+ *   The number of objects added on success.
+ *   On error, no objects are populated and a negative errno is returned.
+ */
+typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp,
+		unsigned int max_objs,
+		void *vaddr, rte_iova_t iova, size_t len,
+		rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg);
+
+/**
+ * Default way to populate memory pool object using provided memory
+ * chunk: just slice objects one by one.
+ */
+int rte_mempool_op_populate_default(struct rte_mempool *mp,
+		unsigned int max_objs,
+		void *vaddr, rte_iova_t iova, size_t len,
+		rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get some additional information about a mempool.
+ */
+typedef int (*rte_mempool_get_info_t)(const struct rte_mempool *mp,
+		struct rte_mempool_info *info);
+
 
 /** Structure defining mempool operations structure */
 struct rte_mempool_ops {
@@ -408,13 +563,23 @@ struct rte_mempool_ops {
 	rte_mempool_dequeue_t dequeue;   /**< Dequeue an object. */
 	rte_mempool_get_count get_count; /**< Get qty of available objs. */
 	/**
-	 * Get the mempool capabilities
+	 * Optional callback to calculate memory size required to
+	 * store specified number of objects.
+	 */
+	rte_mempool_calc_mem_size_t calc_mem_size;
+	/**
+	 * Optional callback to populate mempool objects using
+	 * provided memory chunk.
+	 */
+	rte_mempool_populate_t populate;
+	/**
+	 * Get mempool info
 	 */
-	rte_mempool_get_capabilities_t get_capabilities;
+	rte_mempool_get_info_t get_info;
 	/**
-	 * Notify new memory area to mempool
+	 * Dequeue a number of contiguous object blocks.
 	 */
-	rte_mempool_ops_register_memory_area_t register_memory_area;
+	rte_mempool_dequeue_contig_blocks_t dequeue_contig_blocks;
 } __rte_cache_aligned;
 
 #define RTE_MEMPOOL_MAX_OPS_IDX 16  /**< Max registered ops structs */
@@ -493,6 +658,30 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
 }
 
 /**
+ * @internal Wrapper for mempool_ops dequeue_contig_blocks callback.
+ *
+ * @param[in] mp
+ *   Pointer to the memory pool.
+ * @param[out] first_obj_table
+ *   Pointer to a table of void * pointers (first objects).
+ * @param[in] n
+ *   Number of blocks to get.
+ * @return
+ *   - 0: Success; got n objects.
+ *   - <0: Error; code of dequeue function.
+ */
+static inline int
+rte_mempool_ops_dequeue_contig_blocks(struct rte_mempool *mp,
+		void **first_obj_table, unsigned int n)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	RTE_ASSERT(ops->dequeue_contig_blocks != NULL);
+	return ops->dequeue_contig_blocks(mp, first_obj_table, n);
+}
+
+/**
  * @internal wrapper for mempool_ops enqueue callback.
  *
  * @param mp
@@ -527,41 +716,74 @@ unsigned
 rte_mempool_ops_get_count(const struct rte_mempool *mp);
 
 /**
- * @internal wrapper for mempool_ops get_capabilities callback.
+ * @internal wrapper for mempool_ops calc_mem_size callback.
+ * API to calculate size of memory required to store specified number of
+ * object.
  *
- * @param mp [in]
+ * @param[in] mp
  *   Pointer to the memory pool.
- * @param flags [out]
- *   Pointer to the mempool flags.
+ * @param[in] obj_num
+ *   Number of objects.
+ * @param[in] pg_shift
+ *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param[out] min_chunk_size
+ *   Location for minimum size of the memory chunk which may be used to
+ *   store memory pool objects.
+ * @param[out] align
+ *   Location for required memory chunk alignment.
  * @return
- *   - 0: Success; The mempool driver has advertised his pool capabilities in
- *   flags param.
- *   - -ENOTSUP - doesn't support get_capabilities ops (valid case).
- *   - Otherwise, pool create fails.
+ *   Required memory size aligned at page boundary.
  */
-int
-rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
-					unsigned int *flags);
+ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+				      uint32_t obj_num, uint32_t pg_shift,
+				      size_t *min_chunk_size, size_t *align);
+
 /**
- * @internal wrapper for mempool_ops register_memory_area callback.
- * API to notify the mempool handler when a new memory area is added to pool.
+ * @internal wrapper for mempool_ops populate callback.
  *
- * @param mp
+ * Populate memory pool objects using provided memory chunk.
+ *
+ * @param[in] mp
+ *   A pointer to the mempool structure.
+ * @param[in] max_objs
+ *   Maximum number of objects to be populated.
+ * @param[in] vaddr
+ *   The virtual address of memory that should be used to store objects.
+ * @param[in] iova
+ *   The IO address
+ * @param[in] len
+ *   The length of memory in bytes.
+ * @param[in] obj_cb
+ *   Callback function to be executed for each populated object.
+ * @param[in] obj_cb_arg
+ *   An opaque pointer passed to the callback function.
+ * @return
+ *   The number of objects added on success.
+ *   On error, no objects are populated and a negative errno is returned.
+ */
+int rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+			     void *vaddr, rte_iova_t iova, size_t len,
+			     rte_mempool_populate_obj_cb_t *obj_cb,
+			     void *obj_cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Wrapper for mempool_ops get_info callback.
+ *
+ * @param[in] mp
  *   Pointer to the memory pool.
- * @param vaddr
- *   Pointer to the buffer virtual address.
- * @param iova
- *   Pointer to the buffer IO address.
- * @param len
- *   Pool size.
+ * @param[out] info
+ *   Pointer to the rte_mempool_info structure
  * @return
- *   - 0: Success;
- *   - -ENOTSUP - doesn't support register_memory_area ops (valid error case).
- *   - Otherwise, rte_mempool_populate_phys fails thus pool create fails.
+ *   - 0: Success; The mempool driver supports retrieving supplementary
+ *        mempool information
+ *   - -ENOTSUP - doesn't support get_info ops (valid case).
  */
-int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
-				char *vaddr, rte_iova_t iova, size_t len);
+__rte_experimental
+int rte_mempool_ops_get_info(const struct rte_mempool *mp,
+			 struct rte_mempool_info *info);
 
 /**
  * @internal wrapper for mempool_ops free callback.
@@ -710,8 +932,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
  *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
  *     when using rte_mempool_get() or rte_mempool_get_bulk() is
  *     "single-consumer". Otherwise, it is "multi-consumers".
- *   - MEMPOOL_F_NO_PHYS_CONTIG: If set, allocated objects won't
- *     necessarily be contiguous in physical memory.
+ *   - MEMPOOL_F_NO_IOVA_CONTIG: If set, allocated objects won't
+ *     necessarily be contiguous in IO memory.
  * @return
  *   The pointer to the new allocated mempool, on success. NULL on error
  *   with rte_errno set appropriately. Possible rte_errno values include:
@@ -730,72 +952,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 		   int socket_id, unsigned flags);
 
 /**
- * Create a new mempool named *name* in memory.
- *
- * The pool contains n elements of elt_size. Its size is set to n.
- * This function uses ``memzone_reserve()`` to allocate the mempool header
- * (and the objects if vaddr is NULL).
- * Depending on the input parameters, mempool elements can be either allocated
- * together with the mempool header, or an externally provided memory buffer
- * could be used to store mempool objects. In later case, that external
- * memory buffer can consist of set of disjoint physical pages.
- *
- * @param name
- *   The name of the mempool.
- * @param n
- *   The number of elements in the mempool. The optimum size (in terms of
- *   memory usage) for a mempool is when n is a power of two minus one:
- *   n = (2^q - 1).
- * @param elt_size
- *   The size of each element.
- * @param cache_size
- *   Size of the cache. See rte_mempool_create() for details.
- * @param private_data_size
- *   The size of the private data appended after the mempool
- *   structure. This is useful for storing some private data after the
- *   mempool structure, as is done for rte_mbuf_pool for example.
- * @param mp_init
- *   A function pointer that is called for initialization of the pool,
- *   before object initialization. The user can initialize the private
- *   data in this function if needed. This parameter can be NULL if
- *   not needed.
- * @param mp_init_arg
- *   An opaque pointer to data that can be used in the mempool
- *   constructor function.
- * @param obj_init
- *   A function called for each object at initialization of the pool.
- *   See rte_mempool_create() for details.
- * @param obj_init_arg
- *   An opaque pointer passed to the object constructor function.
- * @param socket_id
- *   The *socket_id* argument is the socket identifier in the case of
- *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
- *   constraint for the reserved zone.
- * @param flags
- *   Flags controlling the behavior of the mempool. See
- *   rte_mempool_create() for details.
- * @param vaddr
- *   Virtual address of the externally allocated memory buffer.
- *   Will be used to store mempool objects.
- * @param iova
- *   Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @return
- *   The pointer to the new allocated mempool, on success. NULL on error
- *   with rte_errno set appropriately. See rte_mempool_create() for details.
- */
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags, void *vaddr,
-		const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift);
-
-/**
  * Create an empty mempool
  *
  * The mempool is allocated and initialized, but it is not populated: no
@@ -877,46 +1033,6 @@ int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
 	rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
 	void *opaque);
 
-__rte_deprecated
-int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
-	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
-	void *opaque);
-
-/**
- * Add physical memory for objects in the pool at init
- *
- * Add a virtually contiguous memory chunk in the pool where objects can
- * be instantiated. The IO addresses corresponding to the virtual
- * area are described in iova[], pg_num, pg_shift.
- *
- * @param mp
- *   A pointer to the mempool structure.
- * @param vaddr
- *   The virtual address of memory that should be used to store objects.
- * @param iova
- *   An array of IO addresses of each page composing the virtual area.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @param free_cb
- *   The callback used to free this chunk when destroying the mempool.
- * @param opaque
- *   An opaque argument passed to free_cb.
- * @return
- *   The number of objects added on success.
- *   On error, the chunks are not added in the memory list of the
- *   mempool and a negative errno is returned.
- */
-int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
-	const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
-__rte_deprecated
-int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
 /**
  * Add virtually contiguous memory for objects in the pool at init
  *
@@ -1049,22 +1165,6 @@ void
 rte_mempool_cache_free(struct rte_mempool_cache *cache);
 
 /**
- * Flush a user-owned mempool cache to the specified mempool.
- *
- * @param cache
- *   A pointer to the mempool cache.
- * @param mp
- *   A pointer to the mempool.
- */
-static __rte_always_inline void
-rte_mempool_cache_flush(struct rte_mempool_cache *cache,
-			struct rte_mempool *mp)
-{
-	rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
-	cache->len = 0;
-}
-
-/**
  * Get a pointer to the per-lcore default mempool cache.
  *
  * @param mp
@@ -1087,6 +1187,26 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
 }
 
 /**
+ * Flush a user-owned mempool cache to the specified mempool.
+ *
+ * @param cache
+ *   A pointer to the mempool cache.
+ * @param mp
+ *   A pointer to the mempool.
+ */
+static __rte_always_inline void
+rte_mempool_cache_flush(struct rte_mempool_cache *cache,
+			struct rte_mempool *mp)
+{
+	if (cache == NULL)
+		cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	if (cache == NULL || cache->len == 0)
+		return;
+	rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
+	cache->len = 0;
+}
+
+/**
  * @internal Put several objects back in the mempool; used internally.
  * @param mp
  *   A pointer to the mempool structure.
@@ -1366,6 +1486,49 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p)
 }
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get a contiguous blocks of objects from the mempool.
+ *
+ * If cache is enabled, consider to flush it first, to reuse objects
+ * as soon as possible.
+ *
+ * The application should check that the driver supports the operation
+ * by calling rte_mempool_ops_get_info() and checking that `contig_block_size`
+ * is not zero.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param first_obj_table
+ *   A pointer to a pointer to the first object in each block.
+ * @param n
+ *   The number of blocks to get from mempool.
+ * @return
+ *   - 0: Success; blocks taken.
+ *   - -ENOBUFS: Not enough entries in the mempool; no object is retrieved.
+ *   - -EOPNOTSUPP: The mempool driver does not support block dequeue
+ */
+static __rte_always_inline int
+__rte_experimental
+rte_mempool_get_contig_blocks(struct rte_mempool *mp,
+			      void **first_obj_table, unsigned int n)
+{
+	int ret;
+
+	ret = rte_mempool_ops_dequeue_contig_blocks(mp, first_obj_table, n);
+	if (ret == 0) {
+		__MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_success, n);
+		__mempool_contig_blocks_check_cookies(mp, first_obj_table, n,
+						      1);
+	} else {
+		__MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, get_fail, n);
+	}
+
+	return ret;
+}
+
+/**
  * Return the number of entries in the mempool.
  *
  * When cache is enabled, this function has to browse the length of
@@ -1439,7 +1602,7 @@ rte_mempool_empty(const struct rte_mempool *mp)
  *   A pointer (virtual address) to the element of the pool.
  * @return
  *   The IO address of the elt element.
- *   If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the
+ *   If the mempool was created with MEMPOOL_F_NO_IOVA_CONTIG, the
  *   returned value is RTE_BAD_IOVA.
  */
 static inline rte_iova_t
@@ -1451,13 +1614,6 @@ rte_mempool_virt2iova(const void *elt)
 	return hdr->iova;
 }
 
-__rte_deprecated
-static inline phys_addr_t
-rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
-{
-	return rte_mempool_virt2iova(elt);
-}
-
 /**
  * Check the consistency of mempool objects.
  *
@@ -1527,64 +1683,6 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 	struct rte_mempool_objsz *sz);
 
 /**
- * Get the size of memory required to store mempool elements.
- *
- * Calculate the maximum amount of memory required to store given number
- * of objects. Assume that the memory buffer will be aligned at page
- * boundary.
- *
- * Note that if object size is bigger then page size, then it assumes
- * that pages are grouped in subsets of physically continuous pages big
- * enough to store at least one object.
- *
- * @param elt_num
- *   Number of elements.
- * @param total_elt_sz
- *   The size of each element, including header and trailer, as returned
- *   by rte_mempool_calc_obj_size().
- * @param pg_shift
- *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
- * @param flags
- *  The mempool flags.
- * @return
- *   Required memory size aligned at page boundary.
- */
-size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
-	uint32_t pg_shift, unsigned int flags);
-
-/**
- * Get the size of memory required to store mempool elements.
- *
- * Calculate how much memory would be actually required with the given
- * memory footprint to store required number of objects.
- *
- * @param vaddr
- *   Virtual address of the externally allocated memory buffer.
- *   Will be used to store mempool objects.
- * @param elt_num
- *   Number of elements.
- * @param total_elt_sz
- *   The size of each element, including header and trailer, as returned
- *   by rte_mempool_calc_obj_size().
- * @param iova
- *   Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @param flags
- *  The mempool flags.
- * @return
- *   On success, the number of bytes needed to store given number of
- *   objects, aligned to the given page size. If the provided memory
- *   buffer is too small, return a negative value whose absolute value
- *   is the actual number of elements that can be stored in that buffer.
- */
-ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
-	size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
-	uint32_t pg_shift, unsigned int flags);
-
-/**
  * Walk list of all memory pools
  *
  * @param func
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 0732255c..a27e1fa5 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -57,8 +57,10 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
 	ops->enqueue = h->enqueue;
 	ops->dequeue = h->dequeue;
 	ops->get_count = h->get_count;
-	ops->get_capabilities = h->get_capabilities;
-	ops->register_memory_area = h->register_memory_area;
+	ops->calc_mem_size = h->calc_mem_size;
+	ops->populate = h->populate;
+	ops->get_info = h->get_info;
+	ops->dequeue_contig_blocks = h->dequeue_contig_blocks;
 
 	rte_spinlock_unlock(&rte_mempool_ops_table.sl);
 
@@ -97,32 +99,57 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp)
 	return ops->get_count(mp);
 }
 
-/* wrapper to get external mempool capabilities. */
+/* wrapper to notify new memory area to external mempool */
+ssize_t
+rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp,
+				uint32_t obj_num, uint32_t pg_shift,
+				size_t *min_chunk_size, size_t *align)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+
+	if (ops->calc_mem_size == NULL)
+		return rte_mempool_op_calc_mem_size_default(mp, obj_num,
+				pg_shift, min_chunk_size, align);
+
+	return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align);
+}
+
+/* wrapper to populate memory pool objects using provided memory chunk */
 int
-rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
-					unsigned int *flags)
+rte_mempool_ops_populate(struct rte_mempool *mp, unsigned int max_objs,
+				void *vaddr, rte_iova_t iova, size_t len,
+				rte_mempool_populate_obj_cb_t *obj_cb,
+				void *obj_cb_arg)
 {
 	struct rte_mempool_ops *ops;
 
 	ops = rte_mempool_get_ops(mp->ops_index);
 
-	RTE_FUNC_PTR_OR_ERR_RET(ops->get_capabilities, -ENOTSUP);
-	return ops->get_capabilities(mp, flags);
+	if (ops->populate == NULL)
+		return rte_mempool_op_populate_default(mp, max_objs, vaddr,
+						       iova, len, obj_cb,
+						       obj_cb_arg);
+
+	return ops->populate(mp, max_objs, vaddr, iova, len, obj_cb,
+			     obj_cb_arg);
 }
 
-/* wrapper to notify new memory area to external mempool */
+/* wrapper to get additional mempool info */
 int
-rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
-					rte_iova_t iova, size_t len)
+rte_mempool_ops_get_info(const struct rte_mempool *mp,
+			 struct rte_mempool_info *info)
 {
 	struct rte_mempool_ops *ops;
 
 	ops = rte_mempool_get_ops(mp->ops_index);
 
-	RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP);
-	return ops->register_memory_area(mp, vaddr, iova, len);
+	RTE_FUNC_PTR_OR_ERR_RET(ops->get_info, -ENOTSUP);
+	return ops->get_info(mp, info);
 }
 
+
 /* sets mempool ops previously registered by rte_mempool_register_ops. */
 int
 rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
new file mode 100644
index 00000000..4e2bfc82
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 6WIND S.A.
+ * Copyright(c) 2018 Solarflare Communications Inc.
+ */
+
+#include <rte_mempool.h>
+
+ssize_t
+rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
+				     uint32_t obj_num, uint32_t pg_shift,
+				     size_t *min_chunk_size, size_t *align)
+{
+	size_t total_elt_sz;
+	size_t obj_per_page, pg_num, pg_sz;
+	size_t mem_size;
+
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+	if (total_elt_sz == 0) {
+		mem_size = 0;
+	} else if (pg_shift == 0) {
+		mem_size = total_elt_sz * obj_num;
+	} else {
+		pg_sz = (size_t)1 << pg_shift;
+		obj_per_page = pg_sz / total_elt_sz;
+		if (obj_per_page == 0) {
+			/*
+			 * Note that if object size is bigger than page size,
+			 * then it is assumed that pages are grouped in subsets
+			 * of physically continuous pages big enough to store
+			 * at least one object.
+			 */
+			mem_size =
+				RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * obj_num;
+		} else {
+			pg_num = (obj_num + obj_per_page - 1) / obj_per_page;
+			mem_size = pg_num << pg_shift;
+		}
+	}
+
+	*min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
+
+	*align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift);
+
+	return mem_size;
+}
+
+int
+rte_mempool_op_populate_default(struct rte_mempool *mp, unsigned int max_objs,
+		void *vaddr, rte_iova_t iova, size_t len,
+		rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
+{
+	size_t total_elt_sz;
+	size_t off;
+	unsigned int i;
+	void *obj;
+
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+	for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) {
+		off += mp->header_size;
+		obj = (char *)vaddr + off;
+		obj_cb(mp, obj_cb_arg, obj,
+		       (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));
+		rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+		off += mp->elt_size + mp->trailer_size;
+	}
+
+	return i;
+}
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 62b76f91..17cbca46 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -8,9 +8,6 @@ DPDK_2.0 {
 	rte_mempool_list_dump;
 	rte_mempool_lookup;
 	rte_mempool_walk;
-	rte_mempool_xmem_create;
-	rte_mempool_xmem_size;
-	rte_mempool_xmem_usage;
 
 	local: *;
 };
@@ -34,8 +31,6 @@ DPDK_16.07 {
 	rte_mempool_ops_table;
 	rte_mempool_populate_anon;
 	rte_mempool_populate_default;
-	rte_mempool_populate_phys;
-	rte_mempool_populate_phys_tab;
 	rte_mempool_populate_virt;
 	rte_mempool_register_ops;
 	rte_mempool_set_ops_byname;
@@ -45,9 +40,21 @@ DPDK_16.07 {
 DPDK_17.11 {
 	global:
 
-	rte_mempool_ops_get_capabilities;
-	rte_mempool_ops_register_memory_area;
 	rte_mempool_populate_iova;
-	rte_mempool_populate_iova_tab;
 
 } DPDK_16.07;
+
+DPDK_18.05 {
+	global:
+
+	rte_mempool_contig_blocks_check_cookies;
+	rte_mempool_op_calc_mem_size_default;
+	rte_mempool_op_populate_default;
+
+} DPDK_17.11;
+
+EXPERIMENTAL {
+	global:
+
+	rte_mempool_ops_get_info;
+};
diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile
index 3b80c6ac..2dc071e8 100644
--- a/lib/librte_meter/Makefile
+++ b/lib/librte_meter/Makefile
@@ -16,7 +16,7 @@ LDLIBS += -lrte_eal
 
 EXPORT_MAP := rte_meter_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # all source are stored in SRCS-y
diff --git a/lib/librte_meter/meson.build b/lib/librte_meter/meson.build
index 646fd4d4..947bc19e 100644
--- a/lib/librte_meter/meson.build
+++ b/lib/librte_meter/meson.build
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
 sources = files('rte_meter.c')
 headers = files('rte_meter.h')
diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c
index 332c5190..473f69ab 100644
--- a/lib/librte_meter/rte_meter.c
+++ b/lib/librte_meter/rte_meter.c
@@ -31,61 +31,82 @@ rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_
 }
 
 int
-rte_meter_srtcm_config(struct rte_meter_srtcm *m, struct rte_meter_srtcm_params *params)
+rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
+	struct rte_meter_srtcm_params *params)
 {
-	uint64_t hz;
+	uint64_t hz = rte_get_tsc_hz();
 
 	/* Check input parameters */
-	if ((m == NULL) || (params == NULL)) {
-		return -1;
-	}
+	if ((p == NULL) ||
+		(params == NULL) ||
+		(params->cir == 0) ||
+		((params->cbs == 0) && (params->ebs == 0)))
+		return -EINVAL;
 
-	if ((params->cir == 0) || ((params->cbs == 0) && (params->ebs == 0))) {
-		return -2;
-	}
+	/* Initialize srTCM run-time structure */
+	p->cbs = params->cbs;
+	p->ebs = params->ebs;
+	rte_meter_get_tb_params(hz, params->cir, &p->cir_period,
+		&p->cir_bytes_per_period);
+
+	return 0;
+}
+
+int
+rte_meter_srtcm_config(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_profile *p)
+{
+	/* Check input parameters */
+	if ((m == NULL) || (p == NULL))
+		return -EINVAL;
 
 	/* Initialize srTCM run-time structure */
-	hz = rte_get_tsc_hz();
 	m->time = rte_get_tsc_cycles();
-	m->tc = m->cbs = params->cbs;
-	m->te = m->ebs = params->ebs;
-	rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period);
-
-	RTE_LOG(INFO, METER, "Low level srTCM config: \n"
-		"\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n",
-		m->cir_period, m->cir_bytes_per_period);
+	m->tc = p->cbs;
+	m->te = p->ebs;
 
 	return 0;
 }
 
 int
-rte_meter_trtcm_config(struct rte_meter_trtcm *m, struct rte_meter_trtcm_params *params)
+rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
+	struct rte_meter_trtcm_params *params)
 {
-	uint64_t hz;
+	uint64_t hz = rte_get_tsc_hz();
 
 	/* Check input parameters */
-	if ((m == NULL) || (params == NULL)) {
-		return -1;
-	}
+	if ((p == NULL) ||
+		(params == NULL) ||
+		(params->cir == 0) ||
+		(params->pir == 0) ||
+		(params->pir < params->cir) ||
+		(params->cbs == 0) ||
+		(params->pbs == 0))
+		return -EINVAL;
 
-	if ((params->cir == 0) || (params->pir == 0) || (params->pir < params->cir) ||
-		(params->cbs == 0) || (params->pbs == 0)) {
-		return -2;
-	}
+	/* Initialize trTCM run-time structure */
+	p->cbs = params->cbs;
+	p->pbs = params->pbs;
+	rte_meter_get_tb_params(hz, params->cir, &p->cir_period,
+		&p->cir_bytes_per_period);
+	rte_meter_get_tb_params(hz, params->pir, &p->pir_period,
+		&p->pir_bytes_per_period);
+
+	return 0;
+}
+
+int
+rte_meter_trtcm_config(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_profile *p)
+{
+	/* Check input parameters */
+	if ((m == NULL) || (p == NULL))
+		return -EINVAL;
 
 	/* Initialize trTCM run-time structure */
-	hz = rte_get_tsc_hz();
 	m->time_tc = m->time_tp = rte_get_tsc_cycles();
-	m->tc = m->cbs = params->cbs;
-	m->tp = m->pbs = params->pbs;
-	rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period);
-	rte_meter_get_tb_params(hz, params->pir, &m->pir_period, &m->pir_bytes_per_period);
-
-	RTE_LOG(INFO, METER, "Low level trTCM config: \n"
-		"\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n"
-		"\tPIR period = %" PRIu64 ", PIR bytes per period = %" PRIu64 "\n",
-		m->cir_period, m->cir_bytes_per_period,
-		m->pir_period, m->pir_bytes_per_period);
+	m->tc = p->cbs;
+	m->tp = p->pbs;
 
 	return 0;
 }
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
index ebdc453f..58a05158 100644
--- a/lib/librte_meter/rte_meter.h
+++ b/lib/librte_meter/rte_meter.h
@@ -53,6 +53,18 @@ struct rte_meter_trtcm_params {
 	uint64_t pbs; /**< Peak Burst Size (PBS). Measured in bytes. */
 };
 
+/**
+ * Internal data structure storing the srTCM configuration profile. Typically
+ * shared by multiple srTCM objects.
+ */
+struct rte_meter_srtcm_profile;
+
+/**
+ * Internal data structure storing the trTCM configuration profile. Typically
+ * shared by multiple trTCM objects.
+ */
+struct rte_meter_trtcm_profile;
+
 /** Internal data structure storing the srTCM run-time context per metered traffic flow. */
 struct rte_meter_srtcm;
 
@@ -60,38 +72,68 @@ struct rte_meter_srtcm;
 struct rte_meter_trtcm;
 
 /**
+ * srTCM profile configuration
+ *
+ * @param p
+ *    Pointer to pre-allocated srTCM profile data structure
+ * @param params
+ *    srTCM profile parameters
+ * @return
+ *    0 upon success, error code otherwise
+ */
+int
+rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
+	struct rte_meter_srtcm_params *params);
+
+/**
+ * trTCM profile configuration
+ *
+ * @param p
+ *    Pointer to pre-allocated trTCM profile data structure
+ * @param params
+ *    trTCM profile parameters
+ * @return
+ *    0 upon success, error code otherwise
+ */
+int
+rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
+	struct rte_meter_trtcm_params *params);
+
+/**
  * srTCM configuration per metered traffic flow
  *
  * @param m
  *    Pointer to pre-allocated srTCM data structure
- * @param params
- *    User parameters per srTCM metered traffic flow
+ * @param p
+ *    srTCM profile. Needs to be valid.
  * @return
  *    0 upon success, error code otherwise
  */
 int
 rte_meter_srtcm_config(struct rte_meter_srtcm *m,
-	struct rte_meter_srtcm_params *params);
+	struct rte_meter_srtcm_profile *p);
 
 /**
  * trTCM configuration per metered traffic flow
  *
  * @param m
  *    Pointer to pre-allocated trTCM data structure
- * @param params
- *    User parameters per trTCM metered traffic flow
+ * @param p
+ *    trTCM profile. Needs to be valid.
  * @return
  *    0 upon success, error code otherwise
  */
 int
 rte_meter_trtcm_config(struct rte_meter_trtcm *m,
-	struct rte_meter_trtcm_params *params);
+	struct rte_meter_trtcm_profile *p);
 
 /**
  * srTCM color blind traffic metering
  *
  * @param m
  *    Handle to srTCM instance
+ * @param p
+ *    srTCM profile specified at srTCM object creation time
  * @param time
  *    Current CPU time stamp (measured in CPU cycles)
  * @param pkt_len
@@ -101,6 +143,7 @@ rte_meter_trtcm_config(struct rte_meter_trtcm *m,
  */
 static inline enum rte_meter_color
 rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len);
 
@@ -109,6 +152,8 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
  *
  * @param m
  *    Handle to srTCM instance
+ * @param p
+ *    srTCM profile specified at srTCM object creation time
  * @param time
  *    Current CPU time stamp (measured in CPU cycles)
  * @param pkt_len
@@ -120,6 +165,7 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
  */
 static inline enum rte_meter_color
 rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len,
 	enum rte_meter_color pkt_color);
@@ -129,6 +175,8 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
  *
  * @param m
  *    Handle to trTCM instance
+ * @param p
+ *    trTCM profile specified at trTCM object creation time
  * @param time
  *    Current CPU time stamp (measured in CPU cycles)
  * @param pkt_len
@@ -138,6 +186,7 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
  */
 static inline enum rte_meter_color
 rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len);
 
@@ -146,6 +195,8 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
  *
  * @param m
  *    Handle to trTCM instance
+ * @param p
+ *    trTCM profile specified at trTCM object creation time
  * @param time
  *    Current CPU time stamp (measured in CPU cycles)
  * @param pkt_len
@@ -157,6 +208,7 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
  */
 static inline enum rte_meter_color
 rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len,
 	enum rte_meter_color pkt_color);
@@ -166,33 +218,57 @@ rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
  *
  ***/
 
+struct rte_meter_srtcm_profile {
+	uint64_t cbs;
+	/**< Upper limit for C token bucket */
+	uint64_t ebs;
+	/**< Upper limit for E token bucket */
+	uint64_t cir_period;
+	/**< Number of CPU cycles for each update of C and E token buckets */
+	uint64_t cir_bytes_per_period;
+	/**< Number of bytes to add to C and E token buckets on each update */
+};
+
 /* Internal data structure storing the srTCM run-time context per metered traffic flow. */
 struct rte_meter_srtcm {
 	uint64_t time; /* Time of latest update of C and E token buckets */
 	uint64_t tc;   /* Number of bytes currently available in the committed (C) token bucket */
 	uint64_t te;   /* Number of bytes currently available in the excess (E) token bucket */
-	uint64_t cbs;  /* Upper limit for C token bucket */
-	uint64_t ebs;  /* Upper limit for E token bucket */
-	uint64_t cir_period; /* Number of CPU cycles for one update of C and E token buckets */
-	uint64_t cir_bytes_per_period; /* Number of bytes to add to C and E token buckets on each update */
 };
 
-/* Internal data structure storing the trTCM run-time context per metered traffic flow. */
+struct rte_meter_trtcm_profile {
+	uint64_t cbs;
+	/**< Upper limit for C token bucket */
+	uint64_t pbs;
+	/**< Upper limit for P token bucket */
+	uint64_t cir_period;
+	/**< Number of CPU cycles for one update of C token bucket */
+	uint64_t cir_bytes_per_period;
+	/**< Number of bytes to add to C token bucket on each update */
+	uint64_t pir_period;
+	/**< Number of CPU cycles for one update of P token bucket */
+	uint64_t pir_bytes_per_period;
+	/**< Number of bytes to add to P token bucket on each update */
+};
+
+/**
+ * Internal data structure storing the trTCM run-time context per metered
+ * traffic flow.
+ */
 struct rte_meter_trtcm {
-	uint64_t time_tc; /* Time of latest update of C token bucket */
-	uint64_t time_tp; /* Time of latest update of E token bucket */
-	uint64_t tc;      /* Number of bytes currently available in the committed (C) token bucket */
-	uint64_t tp;      /* Number of bytes currently available in the peak (P) token bucket */
-	uint64_t cbs;     /* Upper limit for C token bucket */
-	uint64_t pbs;     /* Upper limit for P token bucket */
-	uint64_t cir_period; /* Number of CPU cycles for one update of C token bucket */
-	uint64_t cir_bytes_per_period; /* Number of bytes to add to C token bucket on each update */
-	uint64_t pir_period; /* Number of CPU cycles for one update of P token bucket */
-	uint64_t pir_bytes_per_period; /* Number of bytes to add to P token bucket on each update */
+	uint64_t time_tc;
+	/**< Time of latest update of C token bucket */
+	uint64_t time_tp;
+	/**< Time of latest update of E token bucket */
+	uint64_t tc;
+	/**< Number of bytes currently available in committed(C) token bucket */
+	uint64_t tp;
+	/**< Number of bytes currently available in the peak(P) token bucket */
 };
 
 static inline enum rte_meter_color
 rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len)
 {
@@ -200,17 +276,17 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
 
 	/* Bucket update */
 	time_diff = time - m->time;
-	n_periods = time_diff / m->cir_period;
-	m->time += n_periods * m->cir_period;
+	n_periods = time_diff / p->cir_period;
+	m->time += n_periods * p->cir_period;
 
 	/* Put the tokens overflowing from tc into te bucket */
-	tc = m->tc + n_periods * m->cir_bytes_per_period;
+	tc = m->tc + n_periods * p->cir_bytes_per_period;
 	te = m->te;
-	if (tc > m->cbs) {
-		te += (tc - m->cbs);
-		if (te > m->ebs)
-			te = m->ebs;
-		tc = m->cbs;
+	if (tc > p->cbs) {
+		te += (tc - p->cbs);
+		if (te > p->ebs)
+			te = p->ebs;
+		tc = p->cbs;
 	}
 
 	/* Color logic */
@@ -233,6 +309,7 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
 
 static inline enum rte_meter_color
 rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len,
 	enum rte_meter_color pkt_color)
@@ -241,17 +318,17 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
 
 	/* Bucket update */
 	time_diff = time - m->time;
-	n_periods = time_diff / m->cir_period;
-	m->time += n_periods * m->cir_period;
+	n_periods = time_diff / p->cir_period;
+	m->time += n_periods * p->cir_period;
 
 	/* Put the tokens overflowing from tc into te bucket */
-	tc = m->tc + n_periods * m->cir_bytes_per_period;
+	tc = m->tc + n_periods * p->cir_bytes_per_period;
 	te = m->te;
-	if (tc > m->cbs) {
-		te += (tc - m->cbs);
-		if (te > m->ebs)
-			te = m->ebs;
-		tc = m->cbs;
+	if (tc > p->cbs) {
+		te += (tc - p->cbs);
+		if (te > p->ebs)
+			te = p->ebs;
+		tc = p->cbs;
 	}
 
 	/* Color logic */
@@ -274,6 +351,7 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
 
 static inline enum rte_meter_color
 rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len)
 {
@@ -282,18 +360,18 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
 	/* Bucket update */
 	time_diff_tc = time - m->time_tc;
 	time_diff_tp = time - m->time_tp;
-	n_periods_tc = time_diff_tc / m->cir_period;
-	n_periods_tp = time_diff_tp / m->pir_period;
-	m->time_tc += n_periods_tc * m->cir_period;
-	m->time_tp += n_periods_tp * m->pir_period;
+	n_periods_tc = time_diff_tc / p->cir_period;
+	n_periods_tp = time_diff_tp / p->pir_period;
+	m->time_tc += n_periods_tc * p->cir_period;
+	m->time_tp += n_periods_tp * p->pir_period;
 
-	tc = m->tc + n_periods_tc * m->cir_bytes_per_period;
-	if (tc > m->cbs)
-		tc = m->cbs;
+	tc = m->tc + n_periods_tc * p->cir_bytes_per_period;
+	if (tc > p->cbs)
+		tc = p->cbs;
 
-	tp = m->tp + n_periods_tp * m->pir_bytes_per_period;
-	if (tp > m->pbs)
-		tp = m->pbs;
+	tp = m->tp + n_periods_tp * p->pir_bytes_per_period;
+	if (tp > p->pbs)
+		tp = p->pbs;
 
 	/* Color logic */
 	if (tp < pkt_len) {
@@ -315,6 +393,7 @@ rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
 
 static inline enum rte_meter_color
 rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_profile *p,
 	uint64_t time,
 	uint32_t pkt_len,
 	enum rte_meter_color pkt_color)
@@ -324,18 +403,18 @@ rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
 	/* Bucket update */
 	time_diff_tc = time - m->time_tc;
 	time_diff_tp = time - m->time_tp;
-	n_periods_tc = time_diff_tc / m->cir_period;
-	n_periods_tp = time_diff_tp / m->pir_period;
-	m->time_tc += n_periods_tc * m->cir_period;
-	m->time_tp += n_periods_tp * m->pir_period;
-
-	tc = m->tc + n_periods_tc * m->cir_bytes_per_period;
-	if (tc > m->cbs)
-		tc = m->cbs;
-
-	tp = m->tp + n_periods_tp * m->pir_bytes_per_period;
-	if (tp > m->pbs)
-		tp = m->pbs;
+	n_periods_tc = time_diff_tc / p->cir_period;
+	n_periods_tp = time_diff_tp / p->pir_period;
+	m->time_tc += n_periods_tc * p->cir_period;
+	m->time_tp += n_periods_tp * p->pir_period;
+
+	tc = m->tc + n_periods_tc * p->cir_bytes_per_period;
+	if (tc > p->cbs)
+		tc = p->cbs;
+
+	tp = m->tp + n_periods_tp * p->pir_bytes_per_period;
+	if (tp > p->pbs)
+		tp = p->pbs;
 
 	/* Color logic */
 	if ((pkt_color == e_RTE_METER_RED) || (tp < pkt_len)) {
diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map
index 2fd647c6..cb79f0c2 100644
--- a/lib/librte_meter/rte_meter_version.map
+++ b/lib/librte_meter/rte_meter_version.map
@@ -10,3 +10,10 @@ DPDK_2.0 {
 
 	local: *;
 };
+
+DPDK_18.08 {
+	global:
+
+	rte_meter_srtcm_profile_config;
+	rte_meter_trtcm_profile_config;
+};
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index 556ae1ba..99a96b65 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -6,6 +6,7 @@
 #include <sys/queue.h>
 
 #include <rte_common.h>
+#include <rte_string_fns.h>
 #include <rte_malloc.h>
 #include <rte_metrics.h>
 #include <rte_lcore.h>
@@ -95,6 +96,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
 	/* Some sanity checks */
 	if (cnt_names < 1 || names == NULL)
 		return -EINVAL;
+	for (idx_name = 0; idx_name < cnt_names; idx_name++)
+		if (names[idx_name] == NULL)
+			return -EINVAL;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
 	if (memzone == NULL)
@@ -113,10 +117,7 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
 
 	for (idx_name = 0; idx_name < cnt_names; idx_name++) {
 		entry = &stats->metadata[idx_name + stats->cnt_stats];
-		strncpy(entry->name, names[idx_name],
-			RTE_METRICS_MAX_NAME_LEN);
-		/* Enforce NULL-termination */
-		entry->name[RTE_METRICS_MAX_NAME_LEN - 1] = '\0';
+		strlcpy(entry->name, names[idx_name], RTE_METRICS_MAX_NAME_LEN);
 		memset(entry->value, 0, sizeof(entry->value));
 		entry->idx_next_stat = idx_name + stats->cnt_stats + 1;
 	}
@@ -161,6 +162,11 @@ rte_metrics_update_values(int port_id,
 	stats = memzone->addr;
 
 	rte_spinlock_lock(&stats->lock);
+
+	if (key >= stats->cnt_stats) {
+		rte_spinlock_unlock(&stats->lock);
+		return -EINVAL;
+	}
 	idx_metric = key;
 	cnt_setsize = 1;
 	while (idx_metric < stats->cnt_stats) {
@@ -202,9 +208,8 @@ rte_metrics_get_names(struct rte_metric_name *names,
 	int return_value;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
-	/* If not allocated, fail silently */
 	if (memzone == NULL)
-		return 0;
+		return -EIO;
 
 	stats = memzone->addr;
 	rte_spinlock_lock(&stats->lock);
@@ -215,7 +220,7 @@ rte_metrics_get_names(struct rte_metric_name *names,
 			return return_value;
 		}
 		for (idx_name = 0; idx_name < stats->cnt_stats; idx_name++)
-			strncpy(names[idx_name].name,
+			strlcpy(names[idx_name].name,
 				stats->metadata[idx_name].name,
 				RTE_METRICS_MAX_NAME_LEN);
 	}
@@ -240,9 +245,9 @@ rte_metrics_get_values(int port_id,
 		return -EINVAL;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
-	/* If not allocated, fail silently */
 	if (memzone == NULL)
-		return 0;
+		return -EIO;
+
 	stats = memzone->addr;
 	rte_spinlock_lock(&stats->lock);
 
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 95ff5490..85e403f4 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -5,6 +5,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 LIB = librte_net.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 LDLIBS += -lrte_mbuf -lrte_eal -lrte_mempool
 
diff --git a/lib/librte_net/meson.build b/lib/librte_net/meson.build
index 78c0f03e..d3ea1feb 100644
--- a/lib/librte_net/meson.build
+++ b/lib/librte_net/meson.build
@@ -2,6 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 1
+allow_experimental_apis = true
 headers = files('rte_ip.h',
 	'rte_tcp.h',
 	'rte_udp.h',
diff --git a/lib/librte_net/rte_esp.h b/lib/librte_net/rte_esp.h
index 0fc99ac6..f77ec2eb 100644
--- a/lib/librte_net/rte_esp.h
+++ b/lib/librte_net/rte_esp.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2017, Mellanox Technologies.
+ * Copyright 2016 Mellanox Technologies, Ltd
  */
 
 #ifndef _RTE_ESP_H_
diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
index 45daa911..bee2b34f 100644
--- a/lib/librte_net/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -210,7 +210,7 @@ static inline void eth_random_addr(uint8_t *addr)
 	uint8_t *p = (uint8_t *)&rand;
 
 	rte_memcpy(addr, p, ETHER_ADDR_LEN);
-	addr[0] &= ~ETHER_GROUP_ADDR;       /* clear multicast bit */
+	addr[0] &= (uint8_t)~ETHER_GROUP_ADDR;       /* clear multicast bit */
 	addr[0] |= ETHER_LOCAL_ADMIN_ADDR;  /* set local assignment bit */
 }
 
@@ -301,6 +301,7 @@ struct vxlan_hdr {
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
 #define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */
+#define ETHER_TYPE_ETAG 0x893F /**< IEEE 802.1BR E-Tag. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
 #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 #define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
@@ -310,6 +311,31 @@ struct vxlan_hdr {
 /**< VXLAN tunnel header length. */
 
 /**
+ * VXLAN-GPE protocol header (draft-ietf-nvo3-vxlan-gpe-05).
+ * Contains the 8-bit flag, 8-bit next-protocol, 24-bit VXLAN Network
+ * Identifier and Reserved fields (16 bits and 8 bits).
+ */
+struct vxlan_gpe_hdr {
+	uint8_t vx_flags;    /**< flag (8). */
+	uint8_t reserved[2]; /**< Reserved (16). */
+	uint8_t proto;       /**< next-protocol (8). */
+	uint32_t vx_vni;     /**< VNI (24) + Reserved (8). */
+} __attribute__((__packed__));
+
+/* VXLAN-GPE next protocol types */
+#define VXLAN_GPE_TYPE_IPV4 1 /**< IPv4 Protocol. */
+#define VXLAN_GPE_TYPE_IPV6 2 /**< IPv6 Protocol. */
+#define VXLAN_GPE_TYPE_ETH  3 /**< Ethernet Protocol. */
+#define VXLAN_GPE_TYPE_NSH  4 /**< NSH Protocol. */
+#define VXLAN_GPE_TYPE_MPLS 5 /**< MPLS Protocol. */
+#define VXLAN_GPE_TYPE_GBP  6 /**< GBP Protocol. */
+#define VXLAN_GPE_TYPE_VBNG 7 /**< vBNG Protocol. */
+
+#define ETHER_VXLAN_GPE_HLEN (sizeof(struct udp_hdr) + \
+			      sizeof(struct vxlan_gpe_hdr))
+/**< VXLAN-GPE tunnel header length. */
+
+/**
  * Extract VLAN tag information into mbuf
  *
  * Software version of VLAN stripping
@@ -324,11 +350,12 @@ static inline int rte_vlan_strip(struct rte_mbuf *m)
 {
 	struct ether_hdr *eh
 		 = rte_pktmbuf_mtod(m, struct ether_hdr *);
+	struct vlan_hdr *vh;
 
 	if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN))
 		return -1;
 
-	struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1);
+	vh = (struct vlan_hdr *)(eh + 1);
 	m->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 	m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci);
 
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index f32684c6..f2a8904a 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -108,25 +108,25 @@ __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
 	/* workaround gcc strict-aliasing warning */
 	uintptr_t ptr = (uintptr_t)buf;
 	typedef uint16_t __attribute__((__may_alias__)) u16_p;
-	const u16_p *u16 = (const u16_p *)ptr;
-
-	while (len >= (sizeof(*u16) * 4)) {
-		sum += u16[0];
-		sum += u16[1];
-		sum += u16[2];
-		sum += u16[3];
-		len -= sizeof(*u16) * 4;
-		u16 += 4;
+	const u16_p *u16_buf = (const u16_p *)ptr;
+
+	while (len >= (sizeof(*u16_buf) * 4)) {
+		sum += u16_buf[0];
+		sum += u16_buf[1];
+		sum += u16_buf[2];
+		sum += u16_buf[3];
+		len -= sizeof(*u16_buf) * 4;
+		u16_buf += 4;
 	}
-	while (len >= sizeof(*u16)) {
-		sum += *u16;
-		len -= sizeof(*u16);
-		u16 += 1;
+	while (len >= sizeof(*u16_buf)) {
+		sum += *u16_buf;
+		len -= sizeof(*u16_buf);
+		u16_buf += 1;
 	}
 
 	/* if length is in odd bytes */
 	if (len == 1)
-		sum += *((const uint8_t *)u16);
+		sum += *((const uint8_t *)u16_buf);
 
 	return sum;
 }
@@ -222,7 +222,7 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
 	for (;;) {
 		tmp = __rte_raw_cksum(buf, seglen, 0);
 		if (done & 1)
-			tmp = rte_bswap16(tmp);
+			tmp = rte_bswap16((uint16_t)tmp);
 		sum += tmp;
 		done += seglen;
 		if (done == len)
@@ -253,7 +253,7 @@ rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr)
 {
 	uint16_t cksum;
 	cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr));
-	return (cksum == 0xffff) ? cksum : ~cksum;
+	return (cksum == 0xffff) ? cksum : (uint16_t)~cksum;
 }
 
 /**
@@ -318,8 +318,8 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
 	uint32_t cksum;
 	uint32_t l4_len;
 
-	l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) -
-		sizeof(struct ipv4_hdr);
+	l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
+		sizeof(struct ipv4_hdr));
 
 	cksum = rte_raw_cksum(l4_hdr, l4_len);
 	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
@@ -329,7 +329,7 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
 	if (cksum == 0)
 		cksum = 0xffff;
 
-	return cksum;
+	return (uint16_t)cksum;
 }
 
 /**
@@ -375,7 +375,7 @@ rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
 		uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
 	} psd_hdr;
 
-	psd_hdr.proto = (ipv6_hdr->proto << 24);
+	psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24);
 	if (ol_flags & PKT_TX_TCP_SEG) {
 		psd_hdr.len = 0;
 	} else {
@@ -418,7 +418,7 @@ rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr)
 	if (cksum == 0)
 		cksum = 0xffff;
 
-	return cksum;
+	return (uint16_t)cksum;
 }
 
 #ifdef __cplusplus
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
index 56a13e3c..9eb7c743 100644
--- a/lib/librte_net/rte_net.c
+++ b/lib/librte_net/rte_net.c
@@ -178,8 +178,8 @@ ip4_hlen(const struct ipv4_hdr *hdr)
 }
 
 /* parse ipv6 extended headers, update offset and return next proto */
-static uint16_t
-skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+int __rte_experimental
+rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 	int *frag)
 {
 	struct ext_hdr {
@@ -201,7 +201,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
 				&xh_copy);
 			if (xh == NULL)
-				return 0;
+				return -1;
 			*off += (xh->len + 1) * 8;
 			proto = xh->next_hdr;
 			break;
@@ -209,7 +209,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
 				&xh_copy);
 			if (xh == NULL)
-				return 0;
+				return -1;
 			*off += 8;
 			proto = xh->next_hdr;
 			*frag = 1;
@@ -220,7 +220,7 @@ skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
 			return proto;
 		}
 	}
-	return 0;
+	return -1;
 }
 
 /* parse mbuf data to get packet type */
@@ -233,6 +233,7 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 	uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
 	uint32_t off = 0;
 	uint16_t proto;
+	int ret;
 
 	if (hdr_lens == NULL)
 		hdr_lens = &local_hdr_lens;
@@ -316,7 +317,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 		off += hdr_lens->l3_len;
 		pkt_type |= ptype_l3_ip6(proto);
 		if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
-			proto = skip_ip6_ext(proto, m, &off, &frag);
+			ret = rte_net_skip_ip6_ext(proto, m, &off, &frag);
+			if (ret < 0)
+				return pkt_type;
+			proto = ret;
 			hdr_lens->l3_len = off - hdr_lens->l2_len;
 		}
 		if (proto == 0)
@@ -449,7 +453,10 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 			uint32_t prev_off;
 
 			prev_off = off;
-			proto = skip_ip6_ext(proto, m, &off, &frag);
+			ret = rte_net_skip_ip6_ext(proto, m, &off, &frag);
+			if (ret < 0)
+				return pkt_type;
+			proto = ret;
 			hdr_lens->inner_l3_len += off - prev_off;
 		}
 		if (proto == 0)
diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h
index 0e97901f..b6ab6e1d 100644
--- a/lib/librte_net/rte_net.h
+++ b/lib/librte_net/rte_net.h
@@ -29,6 +29,33 @@ struct rte_net_hdr_lens {
 };
 
 /**
+ * Skip IPv6 header extensions.
+ *
+ * This function skips all IPv6 extensions, returning size of
+ * complete header including options and final protocol value.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @param proto
+ *   Protocol field of IPv6 header.
+ * @param m
+ *   The packet mbuf to be parsed.
+ * @param off
+ *   On input, must contain the offset to the first byte following
+ *   IPv6 header, on output, contains offset to the first byte
+ *   of next layer (after any IPv6 extension header)
+ * @param frag
+ *   Contains 1 in output if packet is an IPv6 fragment.
+ * @return
+ *   Protocol that follows IPv6 header.
+ *   -1 if an error occurs during mbuf parsing.
+ */
+int __rte_experimental
+rte_net_skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+	int *frag);
+
+/**
  * Parse an Ethernet packet to get its packet type.
  *
  * This function parses the network headers in mbuf data and return its
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 213e6fd3..26c06e7c 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -17,4 +17,5 @@ EXPERIMENTAL {
 	global:
 
 	rte_net_make_rarp_packet;
-} DPDK_17.05;
+	rte_net_skip_ip6_ext;
+};
diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile
index fe213ea6..94a63267 100644
--- a/lib/librte_pci/Makefile
+++ b/lib/librte_pci/Makefile
@@ -1,33 +1,5 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2017 6WIND S.A.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of 6WIND nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 6WIND S.A.
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 1a6d7485..530738db 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -155,9 +155,10 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
 	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
 			MAP_SHARED | additional_flags, fd, offset);
 	if (mapaddr == MAP_FAILED) {
-		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
-			__func__, fd, requested_addr,
-			(unsigned long)size, (unsigned long)offset,
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot mmap(%d, %p, 0x%zx, 0x%llx): %s (%p)\n",
+			__func__, fd, requested_addr, size,
+			(unsigned long long)offset,
 			strerror(errno), mapaddr);
 	} else
 		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
@@ -174,8 +175,8 @@ pci_unmap_resource(void *requested_addr, size_t size)
 
 	/* Unmap the PCI memory resource of device */
 	if (munmap(requested_addr, size)) {
-		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
-			__func__, requested_addr, (unsigned long)size,
+		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n",
+			__func__, requested_addr, size,
 			strerror(errno));
 	} else
 		RTE_LOG(DEBUG, EAL, "  PCI memory unmapped at %p\n",
diff --git a/lib/librte_pci/rte_pci_version.map b/lib/librte_pci/rte_pci_version.map
index 15d93d95..c0280277 100644
--- a/lib/librte_pci/rte_pci_version.map
+++ b/lib/librte_pci/rte_pci_version.map
@@ -3,12 +3,11 @@ DPDK_17.11 {
 
 	eal_parse_pci_BDF;
 	eal_parse_pci_DomBDF;
-	rte_pci_addr_cmp;
-	rte_pci_addr_parse;
-	rte_pci_device_name;
 	pci_map_resource;
 	pci_unmap_resource;
 	rte_eal_compare_pci_addr;
+	rte_pci_addr_cmp;
+	rte_pci_addr_parse;
 	rte_pci_device_name;
 
 	local: *;
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 98fa752e..0ee0fa1a 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2016 Intel Corporation
+# Copyright(c) 2016-2018 Intel Corporation
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
 # library name
 LIB = librte_pdump.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 CFLAGS += -D_GNU_SOURCE
 LDLIBS += -lpthread
diff --git a/lib/librte_pdump/meson.build b/lib/librte_pdump/meson.build
index 3a95eabd..be80904b 100644
--- a/lib/librte_pdump/meson.build
+++ b/lib/librte_pdump/meson.build
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
 sources = files('rte_pdump.c')
 headers = files('rte_pdump.h')
+allow_experimental_apis = true
 deps += ['ethdev']
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index ec8a5d84..6c3a8858 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -1,35 +1,24 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016 Intel Corporation
+ * Copyright(c) 2016-2018 Intel Corporation
  */
 
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <pthread.h>
-#include <stdbool.h>
-#include <stdio.h>
-
 #include <rte_memcpy.h>
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
 #include <rte_errno.h>
+#include <rte_string_fns.h>
 
 #include "rte_pdump.h"
 
-#define SOCKET_PATH_VAR_RUN "/var/run"
-#define SOCKET_PATH_HOME "HOME"
-#define DPDK_DIR         "/.dpdk"
-#define SOCKET_DIR       "/pdump_sockets"
-#define SERVER_SOCKET "%s/pdump_server_socket"
-#define CLIENT_SOCKET "%s/pdump_client_socket_%d_%u"
 #define DEVICE_ID_SIZE 64
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_PDUMP RTE_LOGTYPE_USER1
 
+/* Used for the multi-process communication */
+#define PDUMP_MP	"mp_pdump"
+
 enum pdump_operation {
 	DISABLE = 1,
 	ENABLE = 2
@@ -39,11 +28,6 @@ enum pdump_version {
 	V1 = 1
 };
 
-static pthread_t pdump_thread;
-static int pdump_socket_fd;
-static char server_socket_dir[PATH_MAX];
-static char client_socket_dir[PATH_MAX];
-
 struct pdump_request {
 	uint16_t ver;
 	uint16_t op;
@@ -75,7 +59,7 @@ struct pdump_response {
 static struct pdump_rxtx_cbs {
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
-	struct rte_eth_rxtx_callback *cb;
+	const struct rte_eth_rxtx_callback *cb;
 	void *filter;
 } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
 tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
@@ -307,7 +291,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 }
 
 static int
-set_pdump_rxtx_cbs(struct pdump_request *p)
+set_pdump_rxtx_cbs(const struct pdump_request *p)
 {
 	uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue;
 	uint16_t port;
@@ -391,313 +375,51 @@ set_pdump_rxtx_cbs(struct pdump_request *p)
 	return ret;
 }
 
-/* get socket path (/var/run if root, $HOME otherwise) */
 static int
-pdump_get_socket_path(char *buffer, int bufsz, enum rte_pdump_socktype type)
+pdump_server(const struct rte_mp_msg *mp_msg, const void *peer)
 {
-	char dpdk_dir[PATH_MAX] = {0};
-	char dir[PATH_MAX] = {0};
-	char *dir_home = NULL;
-	int ret = 0;
-
-	if (type == RTE_PDUMP_SOCKET_SERVER && server_socket_dir[0] != 0)
-		snprintf(dir, sizeof(dir), "%s", server_socket_dir);
-	else if (type == RTE_PDUMP_SOCKET_CLIENT && client_socket_dir[0] != 0)
-		snprintf(dir, sizeof(dir), "%s", client_socket_dir);
-	else {
-		if (getuid() != 0) {
-			dir_home = getenv(SOCKET_PATH_HOME);
-			if (!dir_home) {
-				RTE_LOG(ERR, PDUMP,
-					"Failed to get environment variable"
-					" value for %s, %s:%d\n",
-					SOCKET_PATH_HOME, __func__, __LINE__);
-				return -1;
-			}
-			snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
-					dir_home, DPDK_DIR);
-		} else
-			snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
-					SOCKET_PATH_VAR_RUN, DPDK_DIR);
-
-		mkdir(dpdk_dir, 0700);
-		snprintf(dir, sizeof(dir), "%s%s",
-					dpdk_dir, SOCKET_DIR);
-	}
-
-	ret =  mkdir(dir, 0700);
-	/* if user passed socket path is invalid, return immediately */
-	if (ret < 0 && errno != EEXIST) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to create dir:%s:%s\n", dir,
-			strerror(errno));
-		rte_errno = errno;
-		return -1;
-	}
-
-	if (type == RTE_PDUMP_SOCKET_SERVER)
-		snprintf(buffer, bufsz, SERVER_SOCKET, dir);
-	else
-		snprintf(buffer, bufsz, CLIENT_SOCKET, dir, getpid(),
-				rte_sys_gettid());
-
-	return 0;
-}
-
-static int
-pdump_create_server_socket(void)
-{
-	int ret, socket_fd;
-	struct sockaddr_un addr;
-	socklen_t addr_len;
-
-	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
-				RTE_PDUMP_SOCKET_SERVER);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to get server socket path: %s:%d\n",
-			__func__, __LINE__);
-		return -1;
-	}
-	addr.sun_family = AF_UNIX;
-
-	/* remove if file already exists */
-	unlink(addr.sun_path);
-
-	/* set up a server socket */
-	socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to create server socket: %s, %s:%d\n",
-			strerror(errno), __func__, __LINE__);
-		return -1;
-	}
-
-	addr_len = sizeof(struct sockaddr_un);
-	ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len);
-	if (ret) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to bind to server socket: %s, %s:%d\n",
-			strerror(errno), __func__, __LINE__);
-		close(socket_fd);
+	struct rte_mp_msg mp_resp;
+	const struct pdump_request *cli_req;
+	struct pdump_response *resp = (struct pdump_response *)&mp_resp.param;
+
+	/* recv client requests */
+	if (mp_msg->len_param != sizeof(*cli_req)) {
+		RTE_LOG(ERR, PDUMP, "failed to recv from client\n");
+		resp->err_value = -EINVAL;
+	} else {
+		cli_req = (const struct pdump_request *)mp_msg->param;
+		resp->ver = cli_req->ver;
+		resp->res_op = cli_req->op;
+		resp->err_value = set_pdump_rxtx_cbs(cli_req);
+	}
+
+	strlcpy(mp_resp.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN);
+	mp_resp.len_param = sizeof(*resp);
+	mp_resp.num_fds = 0;
+	if (rte_mp_reply(&mp_resp, peer) < 0) {
+		RTE_LOG(ERR, PDUMP, "failed to send to client:%s, %s:%d\n",
+			strerror(rte_errno), __func__, __LINE__);
 		return -1;
 	}
 
-	/* save the socket in local configuration */
-	pdump_socket_fd = socket_fd;
-
 	return 0;
 }
 
-static __attribute__((noreturn)) void *
-pdump_thread_main(__rte_unused void *arg)
-{
-	struct sockaddr_un cli_addr;
-	socklen_t cli_len;
-	struct pdump_request cli_req;
-	struct pdump_response resp;
-	int n;
-	int ret = 0;
-
-	/* host thread, never break out */
-	for (;;) {
-		/* recv client requests */
-		cli_len = sizeof(cli_addr);
-		n = recvfrom(pdump_socket_fd, &cli_req,
-				sizeof(struct pdump_request), 0,
-				(struct sockaddr *)&cli_addr, &cli_len);
-		if (n < 0) {
-			RTE_LOG(ERR, PDUMP,
-				"failed to recv from client:%s, %s:%d\n",
-				strerror(errno), __func__, __LINE__);
-			continue;
-		}
-
-		ret = set_pdump_rxtx_cbs(&cli_req);
-
-		resp.ver = cli_req.ver;
-		resp.res_op = cli_req.op;
-		resp.err_value = ret;
-		n = sendto(pdump_socket_fd, &resp,
-				sizeof(struct pdump_response),
-				0, (struct sockaddr *)&cli_addr, cli_len);
-		if (n < 0) {
-			RTE_LOG(ERR, PDUMP,
-				"failed to send to client:%s, %s:%d\n",
-				strerror(errno), __func__, __LINE__);
-		}
-	}
-}
-
 int
-rte_pdump_init(const char *path)
+rte_pdump_init(const char *path __rte_unused)
 {
-	int ret = 0;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-
-	ret = rte_pdump_set_socket_dir(path, RTE_PDUMP_SOCKET_SERVER);
-	if (ret != 0)
-		return -1;
-
-	ret = pdump_create_server_socket();
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to create server socket:%s:%d\n",
-			__func__, __LINE__);
-		return -1;
-	}
-
-	/* create the host thread to wait/handle pdump requests */
-	ret = pthread_create(&pdump_thread, NULL, pdump_thread_main, NULL);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to create the pdump thread:%s, %s:%d\n",
-			strerror(ret), __func__, __LINE__);
-		return -1;
-	}
-	/* Set thread_name for aid in debugging. */
-	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pdump-thread");
-	ret = rte_thread_setname(pdump_thread, thread_name);
-	if (ret != 0) {
-		RTE_LOG(DEBUG, PDUMP,
-			"Failed to set thread name for pdump handling\n");
-	}
-
-	return 0;
+	return rte_mp_action_register(PDUMP_MP, pdump_server);
 }
 
 int
 rte_pdump_uninit(void)
 {
-	int ret;
-
-	ret = pthread_cancel(pdump_thread);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to cancel the pdump thread:%s, %s:%d\n",
-			strerror(ret), __func__, __LINE__);
-		return -1;
-	}
-
-	ret = close(pdump_socket_fd);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to close server socket: %s, %s:%d\n",
-			strerror(errno), __func__, __LINE__);
-		return -1;
-	}
-
-	struct sockaddr_un addr;
-
-	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
-				RTE_PDUMP_SOCKET_SERVER);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to get server socket path: %s:%d\n",
-			__func__, __LINE__);
-		return -1;
-	}
-	ret = unlink(addr.sun_path);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to remove server socket addr: %s, %s:%d\n",
-			strerror(errno), __func__, __LINE__);
-		return -1;
-	}
+	rte_mp_action_unregister(PDUMP_MP);
 
 	return 0;
 }
 
 static int
-pdump_create_client_socket(struct pdump_request *p)
-{
-	int ret, socket_fd;
-	int pid;
-	int n;
-	struct pdump_response server_resp;
-	struct sockaddr_un addr, serv_addr, from;
-	socklen_t addr_len, serv_len;
-
-	pid = getpid();
-
-	socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, PDUMP,
-			"client socket(): %s:pid(%d):tid(%u), %s:%d\n",
-			strerror(errno), pid, rte_sys_gettid(),
-			__func__, __LINE__);
-		rte_errno = errno;
-		return -1;
-	}
-
-	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
-				RTE_PDUMP_SOCKET_CLIENT);
-	if (ret != 0) {
-		RTE_LOG(ERR, PDUMP,
-			"Failed to get client socket path: %s:%d\n",
-			__func__, __LINE__);
-		rte_errno = errno;
-		goto exit;
-	}
-	addr.sun_family = AF_UNIX;
-	addr_len = sizeof(struct sockaddr_un);
-
-	do {
-		ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len);
-		if (ret) {
-			RTE_LOG(ERR, PDUMP,
-				"client bind(): %s, %s:%d\n",
-				strerror(errno), __func__, __LINE__);
-			rte_errno = errno;
-			break;
-		}
-
-		serv_len = sizeof(struct sockaddr_un);
-		memset(&serv_addr, 0, sizeof(serv_addr));
-		ret = pdump_get_socket_path(serv_addr.sun_path,
-					sizeof(serv_addr.sun_path),
-					RTE_PDUMP_SOCKET_SERVER);
-		if (ret != 0) {
-			RTE_LOG(ERR, PDUMP,
-				"Failed to get server socket path: %s:%d\n",
-				__func__, __LINE__);
-			rte_errno = errno;
-			break;
-		}
-		serv_addr.sun_family = AF_UNIX;
-
-		n =  sendto(socket_fd, p, sizeof(struct pdump_request), 0,
-				(struct sockaddr *)&serv_addr, serv_len);
-		if (n < 0) {
-			RTE_LOG(ERR, PDUMP,
-				"failed to send to server:%s, %s:%d\n",
-				strerror(errno), __func__, __LINE__);
-			rte_errno = errno;
-			ret = -1;
-			break;
-		}
-
-		n = recvfrom(socket_fd, &server_resp,
-				sizeof(struct pdump_response), 0,
-				(struct sockaddr *)&from, &serv_len);
-		if (n < 0) {
-			RTE_LOG(ERR, PDUMP,
-				"failed to recv from server:%s, %s:%d\n",
-				strerror(errno), __func__, __LINE__);
-			rte_errno = errno;
-			ret = -1;
-			break;
-		}
-		ret = server_resp.err_value;
-	} while (0);
-
-exit:
-	close(socket_fd);
-	unlink(addr.sun_path);
-	return ret;
-}
-
-static int
 pdump_validate_ring_mp(struct rte_ring *ring, struct rte_mempool *mp)
 {
 	if (ring == NULL || mp == NULL) {
@@ -768,36 +490,48 @@ pdump_prepare_client_request(char *device, uint16_t queue,
 				struct rte_mempool *mp,
 				void *filter)
 {
-	int ret;
-	struct pdump_request req = {.ver = 1,};
-
-	req.flags = flags;
-	req.op =  operation;
+	int ret = -1;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct pdump_request *req = (struct pdump_request *)mp_req.param;
+	struct pdump_response *resp;
+
+	req->ver = 1;
+	req->flags = flags;
+	req->op = operation;
 	if ((operation & ENABLE) != 0) {
-		snprintf(req.data.en_v1.device, sizeof(req.data.en_v1.device),
-				"%s", device);
-		req.data.en_v1.queue = queue;
-		req.data.en_v1.ring = ring;
-		req.data.en_v1.mp = mp;
-		req.data.en_v1.filter = filter;
+		snprintf(req->data.en_v1.device,
+			 sizeof(req->data.en_v1.device), "%s", device);
+		req->data.en_v1.queue = queue;
+		req->data.en_v1.ring = ring;
+		req->data.en_v1.mp = mp;
+		req->data.en_v1.filter = filter;
 	} else {
-		snprintf(req.data.dis_v1.device, sizeof(req.data.dis_v1.device),
-				"%s", device);
-		req.data.dis_v1.queue = queue;
-		req.data.dis_v1.ring = NULL;
-		req.data.dis_v1.mp = NULL;
-		req.data.dis_v1.filter = NULL;
+		snprintf(req->data.dis_v1.device,
+			 sizeof(req->data.dis_v1.device), "%s", device);
+		req->data.dis_v1.queue = queue;
+		req->data.dis_v1.ring = NULL;
+		req->data.dis_v1.mp = NULL;
+		req->data.dis_v1.filter = NULL;
+	}
+
+	strlcpy(mp_req.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN);
+	mp_req.len_param = sizeof(*req);
+	mp_req.num_fds = 0;
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) {
+		mp_rep = &mp_reply.msgs[0];
+		resp = (struct pdump_response *)mp_rep->param;
+		rte_errno = resp->err_value;
+		if (!resp->err_value)
+			ret = 0;
+		free(mp_reply.msgs);
 	}
 
-	ret = pdump_create_client_socket(&req);
-	if (ret < 0) {
+	if (ret < 0)
 		RTE_LOG(ERR, PDUMP,
 			"client request for pdump enable/disable failed\n");
-		rte_errno = ret;
-		return -1;
-	}
-
-	return 0;
+	return ret;
 }
 
 int
@@ -884,30 +618,8 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
 }
 
 int
-rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type)
+rte_pdump_set_socket_dir(const char *path __rte_unused,
+			 enum rte_pdump_socktype type __rte_unused)
 {
-	int ret, count;
-
-	if (path != NULL) {
-		if (type == RTE_PDUMP_SOCKET_SERVER) {
-			count = sizeof(server_socket_dir);
-			ret = snprintf(server_socket_dir, count, "%s", path);
-		} else {
-			count = sizeof(client_socket_dir);
-			ret = snprintf(client_socket_dir, count, "%s", path);
-		}
-
-		if (ret < 0  || ret >= count) {
-			RTE_LOG(ERR, PDUMP,
-					"Invalid socket path:%s:%d\n",
-					__func__, __LINE__);
-			if (type == RTE_PDUMP_SOCKET_SERVER)
-				server_socket_dir[0] = 0;
-			else
-				client_socket_dir[0] = 0;
-			return -EINVAL;
-		}
-	}
-
 	return 0;
 }
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index a7e83727..673a2b07 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -37,10 +37,10 @@ enum rte_pdump_socktype {
 /**
  * Initialize packet capturing handling
  *
- * Creates pthread and server socket for handling clients
- * requests to enable/disable rxtx callbacks.
+ * Register the IPC action for communication with target (primary) process.
  *
  * @param path
+ * This parameter is going to be deprecated; it was used for specifying the
  * directory path for server socket.
  *
  * @return
@@ -52,7 +52,7 @@ rte_pdump_init(const char *path);
 /**
  * Un initialize packet capturing handling
  *
- * Cancels pthread, close server socket, removes server socket address.
+ * Unregister the IPC action for communication with target (primary) process.
  *
  * @return
  *    0 on success, -1 on error
@@ -163,6 +163,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
 				uint32_t flags);
 
 /**
+ * @deprecated
  * Allows applications to set server and client socket paths.
  * If specified path is null default path will be selected, i.e.
  *"/var/run/" for root user and "$HOME" for non root user.
@@ -181,7 +182,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
  * 0 on success, -EINVAL on error
  *
  */
-int
+__rte_deprecated int
 rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type);
 
 #ifdef __cplusplus
diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile
index e94fbc02..84afe98c 100644
--- a/lib/librte_pipeline/Makefile
+++ b/lib/librte_pipeline/Makefile
@@ -8,10 +8,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 LIB = librte_pipeline.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table
-LDLIBS += -lrte_port
+LDLIBS += -lrte_port -lrte_meter -lrte_sched
 
 EXPORT_MAP := rte_pipeline_version.map
 
@@ -21,8 +22,10 @@ LIBABIVER := 3
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += rte_port_in_action.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += rte_table_action.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h rte_port_in_action.h rte_table_action.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pipeline/meson.build b/lib/librte_pipeline/meson.build
index a35b6220..dc16ab42 100644
--- a/lib/librte_pipeline/meson.build
+++ b/lib/librte_pipeline/meson.build
@@ -2,6 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 3
-sources = files('rte_pipeline.c')
-headers = files('rte_pipeline.h')
-deps += ['port', 'table']
+allow_experimental_apis = true
+sources = files('rte_pipeline.c', 'rte_port_in_action.c', 'rte_table_action.c')
+headers = files('rte_pipeline.h', 'rte_port_in_action.h', 'rte_table_action.h')
+deps += ['port', 'table', 'meter', 'sched']
diff --git a/lib/librte_pipeline/rte_pipeline_version.map b/lib/librte_pipeline/rte_pipeline_version.map
index e4ee154f..d820b22f 100644
--- a/lib/librte_pipeline/rte_pipeline_version.map
+++ b/lib/librte_pipeline/rte_pipeline_version.map
@@ -45,3 +45,31 @@ DPDK_16.04 {
 	rte_pipeline_ah_packet_drop;
 
 } DPDK_2.2;
+
+EXPERIMENTAL {
+	global:
+
+	rte_port_in_action_apply;
+	rte_port_in_action_create;
+	rte_port_in_action_free;
+	rte_port_in_action_params_get;
+	rte_port_in_action_profile_action_register;
+	rte_port_in_action_profile_create;
+	rte_port_in_action_profile_free;
+	rte_port_in_action_profile_freeze;
+	rte_table_action_apply;
+	rte_table_action_create;
+	rte_table_action_dscp_table_update;
+	rte_table_action_free;
+	rte_table_action_meter_profile_add;
+	rte_table_action_meter_profile_delete;
+	rte_table_action_meter_read;
+	rte_table_action_profile_action_register;
+	rte_table_action_profile_create;
+	rte_table_action_profile_free;
+	rte_table_action_profile_freeze;
+	rte_table_action_table_params_get;
+	rte_table_action_stats_read;
+	rte_table_action_time_read;
+	rte_table_action_ttl_read;
+};
diff --git a/lib/librte_pipeline/rte_port_in_action.c b/lib/librte_pipeline/rte_port_in_action.c
new file mode 100644
index 00000000..e3b00df8
--- /dev/null
+++ b/lib/librte_pipeline/rte_port_in_action.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+
+#include "rte_port_in_action.h"
+
+/**
+ * RTE_PORT_IN_ACTION_FLTR
+ */
+static int
+fltr_cfg_check(struct rte_port_in_action_fltr_config *cfg)
+{
+	if (cfg == NULL)
+		return -1;
+
+	return 0;
+}
+
+struct fltr_data {
+	uint32_t port_id;
+};
+
+static void
+fltr_init(struct fltr_data *data,
+	struct rte_port_in_action_fltr_config *cfg)
+{
+	data->port_id = cfg->port_id;
+}
+
+static int
+fltr_apply(struct fltr_data *data,
+	struct rte_port_in_action_fltr_params *p)
+{
+	/* Check input arguments */
+	if (p == NULL)
+		return -1;
+
+	data->port_id = p->port_id;
+
+	return 0;
+}
+
+/**
+ * RTE_PORT_IN_ACTION_LB
+ */
+static int
+lb_cfg_check(struct rte_port_in_action_lb_config *cfg)
+{
+	if ((cfg == NULL) ||
+		(cfg->key_size < RTE_PORT_IN_ACTION_LB_KEY_SIZE_MIN) ||
+		(cfg->key_size > RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX) ||
+		(!rte_is_power_of_2(cfg->key_size)) ||
+		(cfg->f_hash == NULL))
+		return -1;
+
+	return 0;
+}
+
+struct lb_data {
+	uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE];
+};
+
+static void
+lb_init(struct lb_data *data,
+	struct rte_port_in_action_lb_config *cfg)
+{
+	memcpy(data->port_id, cfg->port_id, sizeof(cfg->port_id));
+}
+
+static int
+lb_apply(struct lb_data *data,
+	struct rte_port_in_action_lb_params *p)
+{
+	/* Check input arguments */
+	if (p == NULL)
+		return -1;
+
+	memcpy(data->port_id, p->port_id, sizeof(p->port_id));
+
+	return 0;
+}
+
+/**
+ * Action profile
+ */
+static int
+action_valid(enum rte_port_in_action_type action)
+{
+	switch (action) {
+	case RTE_PORT_IN_ACTION_FLTR:
+	case RTE_PORT_IN_ACTION_LB:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+#define RTE_PORT_IN_ACTION_MAX                             64
+
+struct ap_config {
+	uint64_t action_mask;
+	struct rte_port_in_action_fltr_config fltr;
+	struct rte_port_in_action_lb_config lb;
+};
+
+static size_t
+action_cfg_size(enum rte_port_in_action_type action)
+{
+	switch (action) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		return sizeof(struct rte_port_in_action_fltr_config);
+	case RTE_PORT_IN_ACTION_LB:
+		return sizeof(struct rte_port_in_action_lb_config);
+	default:
+		return 0;
+	}
+}
+
+static void*
+action_cfg_get(struct ap_config *ap_config,
+	enum rte_port_in_action_type type)
+{
+	switch (type) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		return &ap_config->fltr;
+
+	case RTE_PORT_IN_ACTION_LB:
+		return &ap_config->lb;
+
+	default:
+		return NULL;
+	}
+}
+
+static void
+action_cfg_set(struct ap_config *ap_config,
+	enum rte_port_in_action_type type,
+	void *action_cfg)
+{
+	void *dst = action_cfg_get(ap_config, type);
+
+	if (dst)
+		memcpy(dst, action_cfg, action_cfg_size(type));
+
+	ap_config->action_mask |= 1LLU << type;
+}
+
+struct ap_data {
+	size_t offset[RTE_PORT_IN_ACTION_MAX];
+	size_t total_size;
+};
+
+static size_t
+action_data_size(enum rte_port_in_action_type action,
+	struct ap_config *ap_config __rte_unused)
+{
+	switch (action) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		return sizeof(struct fltr_data);
+
+	case RTE_PORT_IN_ACTION_LB:
+		return sizeof(struct lb_data);
+
+	default:
+		return 0;
+	}
+}
+
+static void
+action_data_offset_set(struct ap_data *ap_data,
+	struct ap_config *ap_config)
+{
+	uint64_t action_mask = ap_config->action_mask;
+	size_t offset;
+	uint32_t action;
+
+	memset(ap_data->offset, 0, sizeof(ap_data->offset));
+
+	offset = 0;
+	for (action = 0; action < RTE_PORT_IN_ACTION_MAX; action++)
+		if (action_mask & (1LLU << action)) {
+			ap_data->offset[action] = offset;
+			offset += action_data_size((enum rte_port_in_action_type)action,
+				ap_config);
+		}
+
+	ap_data->total_size = offset;
+}
+
+struct rte_port_in_action_profile {
+	struct ap_config cfg;
+	struct ap_data data;
+	int frozen;
+};
+
+struct rte_port_in_action_profile *
+rte_port_in_action_profile_create(uint32_t socket_id)
+{
+	struct rte_port_in_action_profile *ap;
+
+	/* Memory allocation */
+	ap = rte_zmalloc_socket(NULL,
+		sizeof(struct rte_port_in_action_profile),
+		RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (ap == NULL)
+		return NULL;
+
+	return ap;
+}
+
+int
+rte_port_in_action_profile_action_register(struct rte_port_in_action_profile *profile,
+	enum rte_port_in_action_type type,
+	void *action_config)
+{
+	int status;
+
+	/* Check input arguments */
+	if ((profile == NULL) ||
+		profile->frozen ||
+		(action_valid(type) == 0) ||
+		(profile->cfg.action_mask & (1LLU << type)) ||
+		((action_cfg_size(type) == 0) && action_config) ||
+		(action_cfg_size(type) && (action_config == NULL)))
+		return -EINVAL;
+
+	switch (type) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		status = fltr_cfg_check(action_config);
+		break;
+
+	case RTE_PORT_IN_ACTION_LB:
+		status = lb_cfg_check(action_config);
+		break;
+
+	default:
+		status = 0;
+		break;
+	}
+
+	if (status)
+		return status;
+
+	/* Action enable */
+	action_cfg_set(&profile->cfg, type, action_config);
+
+	return 0;
+}
+
+int
+rte_port_in_action_profile_freeze(struct rte_port_in_action_profile *profile)
+{
+	if (profile->frozen)
+		return -EBUSY;
+
+	action_data_offset_set(&profile->data, &profile->cfg);
+	profile->frozen = 1;
+
+	return 0;
+}
+
+int
+rte_port_in_action_profile_free(struct rte_port_in_action_profile *profile)
+{
+	if (profile == NULL)
+		return 0;
+
+	free(profile);
+	return 0;
+}
+
+/**
+ * Action
+ */
+struct rte_port_in_action {
+	struct ap_config cfg;
+	struct ap_data data;
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static __rte_always_inline void *
+action_data_get(struct rte_port_in_action *action,
+	enum rte_port_in_action_type type)
+{
+	size_t offset = action->data.offset[type];
+
+	return &action->memory[offset];
+}
+
+static void
+action_data_init(struct rte_port_in_action *action,
+	enum rte_port_in_action_type type)
+{
+	void *data = action_data_get(action, type);
+
+	switch (type) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		fltr_init(data, &action->cfg.fltr);
+		return;
+
+	case RTE_PORT_IN_ACTION_LB:
+		lb_init(data, &action->cfg.lb);
+		return;
+
+	default:
+		return;
+	}
+}
+
+struct rte_port_in_action *
+rte_port_in_action_create(struct rte_port_in_action_profile *profile,
+	uint32_t socket_id)
+{
+	struct rte_port_in_action *action;
+	size_t size;
+	uint32_t i;
+
+	/* Check input arguments */
+	if ((profile == NULL) ||
+		(profile->frozen == 0))
+		return NULL;
+
+	/* Memory allocation */
+	size = sizeof(struct rte_port_in_action) + profile->data.total_size;
+	size = RTE_CACHE_LINE_ROUNDUP(size);
+
+	action = rte_zmalloc_socket(NULL,
+		size,
+		RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (action == NULL)
+		return NULL;
+
+	/* Initialization */
+	memcpy(&action->cfg, &profile->cfg, sizeof(profile->cfg));
+	memcpy(&action->data, &profile->data, sizeof(profile->data));
+
+	for (i = 0; i < RTE_PORT_IN_ACTION_MAX; i++)
+		if (action->cfg.action_mask & (1LLU << i))
+			action_data_init(action,
+				(enum rte_port_in_action_type)i);
+
+	return action;
+}
+
+int
+rte_port_in_action_apply(struct rte_port_in_action *action,
+	enum rte_port_in_action_type type,
+	void *action_params)
+{
+	void *action_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		(action_valid(type) == 0) ||
+		((action->cfg.action_mask & (1LLU << type)) == 0) ||
+		(action_params == NULL))
+		return -EINVAL;
+
+	/* Data update */
+	action_data = action_data_get(action, type);
+
+	switch (type) {
+	case RTE_PORT_IN_ACTION_FLTR:
+		return fltr_apply(action_data,
+			action_params);
+
+	case RTE_PORT_IN_ACTION_LB:
+		return lb_apply(action_data,
+			action_params);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+ah_filter_on_match(struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint32_t n_pkts,
+	void *arg)
+{
+	struct rte_port_in_action *action = arg;
+	struct rte_port_in_action_fltr_config *cfg = &action->cfg.fltr;
+	uint64_t *key_mask = (uint64_t *) cfg->key_mask;
+	uint64_t *key = (uint64_t *) cfg->key;
+	uint32_t key_offset = cfg->key_offset;
+	struct fltr_data *data = action_data_get(action,
+						RTE_PORT_IN_ACTION_FLTR);
+	uint32_t i;
+
+	for (i = 0; i < n_pkts; i++) {
+		struct rte_mbuf *pkt = pkts[i];
+		uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(pkt,
+					key_offset);
+
+		uint64_t xor0 = (pkt_key[0] & key_mask[0]) ^ key[0];
+		uint64_t xor1 = (pkt_key[1] & key_mask[1]) ^ key[1];
+		uint64_t or = xor0 | xor1;
+
+		if (or == 0) {
+			rte_pipeline_ah_packet_hijack(p, 1LLU << i);
+			rte_pipeline_port_out_packet_insert(p,
+				data->port_id, pkt);
+		}
+	}
+
+	return 0;
+}
+
+static int
+ah_filter_on_mismatch(struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint32_t n_pkts,
+	void *arg)
+{
+	struct rte_port_in_action *action = arg;
+	struct rte_port_in_action_fltr_config *cfg = &action->cfg.fltr;
+	uint64_t *key_mask = (uint64_t *) cfg->key_mask;
+	uint64_t *key = (uint64_t *) cfg->key;
+	uint32_t key_offset = cfg->key_offset;
+	struct fltr_data *data = action_data_get(action,
+						RTE_PORT_IN_ACTION_FLTR);
+	uint32_t i;
+
+	for (i = 0; i < n_pkts; i++) {
+		struct rte_mbuf *pkt = pkts[i];
+		uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(pkt,
+						key_offset);
+
+		uint64_t xor0 = (pkt_key[0] & key_mask[0]) ^ key[0];
+		uint64_t xor1 = (pkt_key[1] & key_mask[1]) ^ key[1];
+		uint64_t or = xor0 | xor1;
+
+		if (or) {
+			rte_pipeline_ah_packet_hijack(p, 1LLU << i);
+			rte_pipeline_port_out_packet_insert(p,
+				data->port_id, pkt);
+		}
+	}
+
+	return 0;
+}
+
+static int
+ah_lb(struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint32_t n_pkts,
+	void *arg)
+{
+	struct rte_port_in_action *action = arg;
+	struct rte_port_in_action_lb_config *cfg = &action->cfg.lb;
+	struct lb_data *data = action_data_get(action, RTE_PORT_IN_ACTION_LB);
+	uint64_t pkt_mask = RTE_LEN2MASK(n_pkts, uint64_t);
+	uint32_t i;
+
+	rte_pipeline_ah_packet_hijack(p, pkt_mask);
+
+	for (i = 0; i < n_pkts; i++) {
+		struct rte_mbuf *pkt = pkts[i];
+		uint8_t *pkt_key = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+					cfg->key_offset);
+
+		uint64_t digest = cfg->f_hash(pkt_key,
+			cfg->key_mask,
+			cfg->key_size,
+			cfg->seed);
+		uint64_t pos = digest & (RTE_PORT_IN_ACTION_LB_TABLE_SIZE - 1);
+		uint32_t port_id = data->port_id[pos];
+
+		rte_pipeline_port_out_packet_insert(p, port_id, pkt);
+	}
+
+	return 0;
+}
+
+static rte_pipeline_port_in_action_handler
+ah_selector(struct rte_port_in_action *action)
+{
+	if (action->cfg.action_mask == 0)
+		return NULL;
+
+	if (action->cfg.action_mask == 1LLU << RTE_PORT_IN_ACTION_FLTR)
+		return (action->cfg.fltr.filter_on_match) ?
+			ah_filter_on_match : ah_filter_on_mismatch;
+
+	if (action->cfg.action_mask == 1LLU << RTE_PORT_IN_ACTION_LB)
+		return ah_lb;
+
+	return NULL;
+}
+
+int
+rte_port_in_action_params_get(struct rte_port_in_action *action,
+	struct rte_pipeline_port_in_params *params)
+{
+	rte_pipeline_port_in_action_handler f_action;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		(params == NULL))
+		return -EINVAL;
+
+	f_action = ah_selector(action);
+
+	/* Fill in params */
+	params->f_action = f_action;
+	params->arg_ah = (f_action) ? action : NULL;
+
+	return 0;
+}
+
+int
+rte_port_in_action_free(struct rte_port_in_action *action)
+{
+	if (action == NULL)
+		return 0;
+
+	rte_free(action);
+
+	return 0;
+}
diff --git a/lib/librte_pipeline/rte_port_in_action.h b/lib/librte_pipeline/rte_port_in_action.h
new file mode 100644
index 00000000..0a85e4e0
--- /dev/null
+++ b/lib/librte_pipeline/rte_port_in_action.h
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_PORT_IN_ACTION_H__
+#define __INCLUDE_RTE_PORT_IN_ACTION_H__
+
+/**
+ * @file
+ * RTE Pipeline Input Port Actions
+ *
+ * This API provides a common set of actions for pipeline input ports to speed
+ * up application development.
+ *
+ * Each pipeline input port can be assigned an action handler to be executed
+ * on every input packet during the pipeline execution. The pipeline library
+ * allows the user to define his own input port actions by providing customized
+ * input port action handler. While the user can still follow this process, this
+ * API is intended to provide a quicker development alternative for a set of
+ * predefined actions.
+ *
+ * The typical steps to use this API are:
+ *  - Define an input port action profile. This is a configuration template that
+ *    can potentially be shared by multiple input ports from the same or
+ *    different pipelines, with different input ports from the same pipeline
+ *    able to use different action profiles. For every input port using a given
+ *    action profile, the profile defines the set of actions and the action
+ *    configuration to be executed by the input port. API functions:
+ *    rte_port_in_action_profile_create(),
+ *    rte_port_in_action_profile_action_register(),
+ *    rte_port_in_action_profile_freeze().
+ *
+ *  - Instantiate the input port action profile to create input port action
+ *    objects. Each pipeline input port has its own action object.
+ *    API functions: rte_port_in_action_create().
+ *
+ *  - Use the input port action object to generate the input port action handler
+ *    invoked by the pipeline. API functions:
+ *    rte_port_in_action_params_get().
+ *
+ *  - Use the input port action object to generate the internal data structures
+ *    used by the input port action handler based on given action parameters.
+ *    API functions: rte_port_in_action_apply().
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+#include <rte_table_hash.h>
+
+#include "rte_pipeline.h"
+
+/** Input port actions. */
+enum rte_port_in_action_type {
+	/** Filter selected input packets. */
+	RTE_PORT_IN_ACTION_FLTR = 0,
+
+	/**  Load balance. */
+	RTE_PORT_IN_ACTION_LB,
+};
+
+/**
+ * RTE_PORT_IN_ACTION_FLTR
+ */
+/** Filter key size (number of bytes) */
+#define RTE_PORT_IN_ACTION_FLTR_KEY_SIZE                   16
+
+/** Filter action configuration (per action profile). */
+struct rte_port_in_action_fltr_config {
+	/** Key offset within the input packet buffer. Offset 0 points to the
+	 * first byte of the MBUF structure.
+	 */
+	uint32_t key_offset;
+
+	/** Key mask. */
+	uint8_t key_mask[RTE_PORT_IN_ACTION_FLTR_KEY_SIZE];
+
+	/** Key value. */
+	uint8_t key[RTE_PORT_IN_ACTION_FLTR_KEY_SIZE];
+
+	/** When non-zero, all the input packets that match the *key* (with the
+	 * *key_mask* applied) are sent to the pipeline output port *port_id*.
+	 * When zero, all the input packets that do NOT match the *key* (with
+	 * *key_mask* applied) are sent to the pipeline output port *port_id*.
+	 */
+	int filter_on_match;
+
+	/** Pipeline output port ID to send the filtered input packets to.
+	 * Can be updated later.
+	 *
+	 * @see struct rte_port_in_action_fltr_params
+	 */
+	uint32_t port_id;
+};
+
+/** Filter action parameters (per action). */
+struct rte_port_in_action_fltr_params {
+	/** Pipeline output port ID to send the filtered input packets to. */
+	uint32_t port_id;
+};
+
+/**
+ * RTE_PORT_IN_ACTION_LB
+ */
+/** Load balance key size min (number of bytes). */
+#define RTE_PORT_IN_ACTION_LB_KEY_SIZE_MIN                    8
+
+/** Load balance key size max (number of bytes). */
+#define RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX                    64
+
+/** Load balance table size. */
+#define RTE_PORT_IN_ACTION_LB_TABLE_SIZE                   16
+
+/** Load balance action configuration (per action profile). */
+struct rte_port_in_action_lb_config {
+	/** Key size (number of bytes). */
+	uint32_t key_size;
+
+	/** Key offset within the input packet buffer. Offset 0 points to the
+	 * first byte of the MBUF structure.
+	 */
+	uint32_t key_offset;
+
+	/** Key mask(*key_size* bytes are valid). */
+	uint8_t key_mask[RTE_PORT_IN_ACTION_LB_KEY_SIZE_MAX];
+
+	/** Hash function. */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed value for *f_hash*. */
+	uint64_t seed;
+
+	/** Table defining the weight of each pipeline output port. The weights
+	 * are set in 1/RTE_PORT_IN_ACTION_LB_TABLE_SIZE increments. To assign a
+	 * weight of N/RTE_PORT_IN_ACTION_LB_TABLE_SIZE to a given output port
+	 * (0 <= N <= RTE_PORT_IN_ACTION_LB_TABLE_SIZE), the output port needs
+	 * to show up exactly N times in this table. Can be updated later.
+	 *
+	 * @see struct rte_port_in_action_lb_params
+	 */
+	uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE];
+};
+
+/** Load balance action parameters (per action). */
+struct rte_port_in_action_lb_params {
+	/** Table defining the weight of each pipeline output port. The weights
+	 * are set in 1/RTE_PORT_IN_ACTION_LB_TABLE_SIZE increments. To assign a
+	 * weight of N/RTE_PORT_IN_ACTION_LB_TABLE_SIZE to a given output port
+	 * (0 <= N <= RTE_PORT_IN_ACTION_LB_TABLE_SIZE), the output port needs
+	 * to show up exactly N times in this table.
+	 */
+	uint32_t port_id[RTE_PORT_IN_ACTION_LB_TABLE_SIZE];
+};
+
+/**
+ * Input port action profile.
+ */
+struct rte_port_in_action_profile;
+
+/**
+ * Input port action profile create.
+ *
+ * @param[in] socket_id
+ *   CPU socket ID for the internal data structures memory allocation.
+ * @return
+ *   Input port action profile handle on success, NULL otherwise.
+ */
+struct rte_port_in_action_profile * __rte_experimental
+rte_port_in_action_profile_create(uint32_t socket_id);
+
+/**
+ * Input port action profile free.
+ *
+ * @param[in] profile
+ *   Input port action profile handle (needs to be valid).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_port_in_action_profile_free(struct rte_port_in_action_profile *profile);
+
+/**
+ * Input port action profile action register.
+ *
+ * @param[in] profile
+ *   Input port action profile handle (needs to be valid and not in frozen
+ *   state).
+ * @param[in] type
+ *   Specific input port action to be registered for *profile*.
+ * @param[in] action_config
+ *   Configuration for the *type* action.
+ *   If struct rte_port_in_action_*type*_config is defined, it needs to point to
+ *   a valid instance of this structure, otherwise it needs to be set to NULL.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_port_in_action_profile_action_register(
+	struct rte_port_in_action_profile *profile,
+	enum rte_port_in_action_type type,
+	void *action_config);
+
+/**
+ * Input port action profile freeze.
+ *
+ * Once this function is called successfully, the given profile enters the
+ * frozen state with the following immediate effects: no more actions can be
+ * registered for this profile, so the profile can be instantiated to create
+ * input port action objects.
+ *
+ * @param[in] profile
+ *   Input port profile action handle (needs to be valid and not in frozen
+ *   state).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ *
+ * @see rte_port_in_action_create()
+ */
+int __rte_experimental
+rte_port_in_action_profile_freeze(struct rte_port_in_action_profile *profile);
+
+/**
+ * Input port action.
+ */
+struct rte_port_in_action;
+
+/**
+ * Input port action create.
+ *
+ * Instantiates the given input port action profile to create an input port
+ * action object.
+ *
+ * @param[in] profile
+ *   Input port profile action handle (needs to be valid and in frozen state).
+ * @param[in] socket_id
+ *   CPU socket ID where the internal data structures required by the new input
+ *   port action object should be allocated.
+ * @return
+ *   Handle to input port action object on success, NULL on error.
+ */
+struct rte_port_in_action * __rte_experimental
+rte_port_in_action_create(struct rte_port_in_action_profile *profile,
+	uint32_t socket_id);
+
+/**
+ * Input port action free.
+ *
+ * @param[in] action
+ *   Handle to input port action object (needs to be valid).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_port_in_action_free(struct rte_port_in_action *action);
+
+/**
+ * Input port params get.
+ *
+ * @param[in] action
+ *   Handle to input port action object (needs to be valid).
+ * @param[inout] params
+ *   Pipeline input port parameters (needs to be pre-allocated).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_port_in_action_params_get(struct rte_port_in_action *action,
+	struct rte_pipeline_port_in_params *params);
+
+/**
+ * Input port action apply.
+ *
+ * @param[in] action
+ *   Handle to input port action object (needs to be valid).
+ * @param[in] type
+ *   Specific input port action previously registered for the input port action
+ *   profile of the *action* object.
+ * @param[in] action_params
+ *   Parameters for the *type* action.
+ *   If struct rte_port_in_action_*type*_params is defined, it needs to point to
+ *   a valid instance of this structure, otherwise it needs to be set to NULL.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_port_in_action_apply(struct rte_port_in_action *action,
+	enum rte_port_in_action_type type,
+	void *action_params);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_PORT_IN_ACTION_H__ */
diff --git a/lib/librte_pipeline/rte_table_action.c b/lib/librte_pipeline/rte_table_action.c
new file mode 100644
index 00000000..83ffa5de
--- /dev/null
+++ b/lib/librte_pipeline/rte_table_action.c
@@ -0,0 +1,2386 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_esp.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#include "rte_table_action.h"
+
+#define rte_htons rte_cpu_to_be_16
+#define rte_htonl rte_cpu_to_be_32
+
+#define rte_ntohs rte_be_to_cpu_16
+#define rte_ntohl rte_be_to_cpu_32
+
+/**
+ * RTE_TABLE_ACTION_FWD
+ */
+#define fwd_data rte_pipeline_table_entry
+
+static int
+fwd_apply(struct fwd_data *data,
+	struct rte_table_action_fwd_params *p)
+{
+	data->action = p->action;
+
+	if (p->action == RTE_PIPELINE_ACTION_PORT)
+		data->port_id = p->id;
+
+	if (p->action == RTE_PIPELINE_ACTION_TABLE)
+		data->table_id = p->id;
+
+	return 0;
+}
+
+/**
+ * RTE_TABLE_ACTION_LB
+ */
+static int
+lb_cfg_check(struct rte_table_action_lb_config *cfg)
+{
+	if ((cfg == NULL) ||
+		(cfg->key_size < RTE_TABLE_ACTION_LB_KEY_SIZE_MIN) ||
+		(cfg->key_size > RTE_TABLE_ACTION_LB_KEY_SIZE_MAX) ||
+		(!rte_is_power_of_2(cfg->key_size)) ||
+		(cfg->f_hash == NULL))
+		return -1;
+
+	return 0;
+}
+
+struct lb_data {
+	uint32_t out[RTE_TABLE_ACTION_LB_TABLE_SIZE];
+} __attribute__((__packed__));
+
+static int
+lb_apply(struct lb_data *data,
+	struct rte_table_action_lb_params *p)
+{
+	memcpy(data->out, p->out, sizeof(data->out));
+
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_lb(struct rte_mbuf *mbuf,
+	struct lb_data *data,
+	struct rte_table_action_lb_config *cfg)
+{
+	uint8_t *pkt_key = RTE_MBUF_METADATA_UINT8_PTR(mbuf, cfg->key_offset);
+	uint32_t *out = RTE_MBUF_METADATA_UINT32_PTR(mbuf, cfg->out_offset);
+	uint64_t digest, pos;
+	uint32_t out_val;
+
+	digest = cfg->f_hash(pkt_key,
+		cfg->key_mask,
+		cfg->key_size,
+		cfg->seed);
+	pos = digest & (RTE_TABLE_ACTION_LB_TABLE_SIZE - 1);
+	out_val = data->out[pos];
+
+	*out = out_val;
+}
+
+/**
+ * RTE_TABLE_ACTION_MTR
+ */
+static int
+mtr_cfg_check(struct rte_table_action_mtr_config *mtr)
+{
+	if ((mtr->alg == RTE_TABLE_ACTION_METER_SRTCM) ||
+		((mtr->n_tc != 1) && (mtr->n_tc != 4)) ||
+		(mtr->n_bytes_enabled != 0))
+		return -ENOTSUP;
+	return 0;
+}
+
+#define MBUF_SCHED_QUEUE_TC_COLOR(queue, tc, color)        \
+	((uint16_t)((((uint64_t)(queue)) & 0x3) |          \
+	((((uint64_t)(tc)) & 0x3) << 2) |                  \
+	((((uint64_t)(color)) & 0x3) << 4)))
+
+#define MBUF_SCHED_COLOR(sched, color)                     \
+	(((sched) & (~0x30LLU)) | ((color) << 4))
+
+struct mtr_trtcm_data {
+	struct rte_meter_trtcm trtcm;
+	uint64_t stats[e_RTE_METER_COLORS];
+} __attribute__((__packed__));
+
+#define MTR_TRTCM_DATA_METER_PROFILE_ID_GET(data)          \
+	(((data)->stats[e_RTE_METER_GREEN] & 0xF8LLU) >> 3)
+
+static void
+mtr_trtcm_data_meter_profile_id_set(struct mtr_trtcm_data *data,
+	uint32_t profile_id)
+{
+	data->stats[e_RTE_METER_GREEN] &= ~0xF8LLU;
+	data->stats[e_RTE_METER_GREEN] |= (profile_id % 32) << 3;
+}
+
+#define MTR_TRTCM_DATA_POLICER_ACTION_DROP_GET(data, color)\
+	(((data)->stats[(color)] & 4LLU) >> 2)
+
+#define MTR_TRTCM_DATA_POLICER_ACTION_COLOR_GET(data, color)\
+	((enum rte_meter_color)((data)->stats[(color)] & 3LLU))
+
+static void
+mtr_trtcm_data_policer_action_set(struct mtr_trtcm_data *data,
+	enum rte_meter_color color,
+	enum rte_table_action_policer action)
+{
+	if (action == RTE_TABLE_ACTION_POLICER_DROP) {
+		data->stats[color] |= 4LLU;
+	} else {
+		data->stats[color] &= ~7LLU;
+		data->stats[color] |= color & 3LLU;
+	}
+}
+
+static uint64_t
+mtr_trtcm_data_stats_get(struct mtr_trtcm_data *data,
+	enum rte_meter_color color)
+{
+	return data->stats[color] >> 8;
+}
+
+static void
+mtr_trtcm_data_stats_reset(struct mtr_trtcm_data *data,
+	enum rte_meter_color color)
+{
+	data->stats[color] &= 0xFFLU;
+}
+
+#define MTR_TRTCM_DATA_STATS_INC(data, color)              \
+	((data)->stats[(color)] += (1LLU << 8))
+
+static size_t
+mtr_data_size(struct rte_table_action_mtr_config *mtr)
+{
+	return mtr->n_tc * sizeof(struct mtr_trtcm_data);
+}
+
+struct dscp_table_entry_data {
+	enum rte_meter_color color;
+	uint16_t tc;
+	uint16_t queue_tc_color;
+};
+
+struct dscp_table_data {
+	struct dscp_table_entry_data entry[64];
+};
+
+struct meter_profile_data {
+	struct rte_meter_trtcm_profile profile;
+	uint32_t profile_id;
+	int valid;
+};
+
+static struct meter_profile_data *
+meter_profile_data_find(struct meter_profile_data *mp,
+	uint32_t mp_size,
+	uint32_t profile_id)
+{
+	uint32_t i;
+
+	for (i = 0; i < mp_size; i++) {
+		struct meter_profile_data *mp_data = &mp[i];
+
+		if (mp_data->valid && (mp_data->profile_id == profile_id))
+			return mp_data;
+	}
+
+	return NULL;
+}
+
+static struct meter_profile_data *
+meter_profile_data_find_unused(struct meter_profile_data *mp,
+	uint32_t mp_size)
+{
+	uint32_t i;
+
+	for (i = 0; i < mp_size; i++) {
+		struct meter_profile_data *mp_data = &mp[i];
+
+		if (!mp_data->valid)
+			return mp_data;
+	}
+
+	return NULL;
+}
+
+static int
+mtr_apply_check(struct rte_table_action_mtr_params *p,
+	struct rte_table_action_mtr_config *cfg,
+	struct meter_profile_data *mp,
+	uint32_t mp_size)
+{
+	uint32_t i;
+
+	if (p->tc_mask > RTE_LEN2MASK(cfg->n_tc, uint32_t))
+		return -EINVAL;
+
+	for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) {
+		struct rte_table_action_mtr_tc_params *p_tc = &p->mtr[i];
+		struct meter_profile_data *mp_data;
+
+		if ((p->tc_mask & (1LLU << i)) == 0)
+			continue;
+
+		mp_data = meter_profile_data_find(mp,
+			mp_size,
+			p_tc->meter_profile_id);
+		if (!mp_data)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mtr_apply(struct mtr_trtcm_data *data,
+	struct rte_table_action_mtr_params *p,
+	struct rte_table_action_mtr_config *cfg,
+	struct meter_profile_data *mp,
+	uint32_t mp_size)
+{
+	uint32_t i;
+	int status;
+
+	/* Check input arguments */
+	status = mtr_apply_check(p, cfg, mp, mp_size);
+	if (status)
+		return status;
+
+	/* Apply */
+	for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) {
+		struct rte_table_action_mtr_tc_params *p_tc = &p->mtr[i];
+		struct mtr_trtcm_data *data_tc = &data[i];
+		struct meter_profile_data *mp_data;
+
+		if ((p->tc_mask & (1LLU << i)) == 0)
+			continue;
+
+		/* Find profile */
+		mp_data = meter_profile_data_find(mp,
+			mp_size,
+			p_tc->meter_profile_id);
+		if (!mp_data)
+			return -EINVAL;
+
+		memset(data_tc, 0, sizeof(*data_tc));
+
+		/* Meter object */
+		status = rte_meter_trtcm_config(&data_tc->trtcm,
+			&mp_data->profile);
+		if (status)
+			return status;
+
+		/* Meter profile */
+		mtr_trtcm_data_meter_profile_id_set(data_tc,
+			mp_data - mp);
+
+		/* Policer actions */
+		mtr_trtcm_data_policer_action_set(data_tc,
+			e_RTE_METER_GREEN,
+			p_tc->policer[e_RTE_METER_GREEN]);
+
+		mtr_trtcm_data_policer_action_set(data_tc,
+			e_RTE_METER_YELLOW,
+			p_tc->policer[e_RTE_METER_YELLOW]);
+
+		mtr_trtcm_data_policer_action_set(data_tc,
+			e_RTE_METER_RED,
+			p_tc->policer[e_RTE_METER_RED]);
+	}
+
+	return 0;
+}
+
+static __rte_always_inline uint64_t
+pkt_work_mtr(struct rte_mbuf *mbuf,
+	struct mtr_trtcm_data *data,
+	struct dscp_table_data *dscp_table,
+	struct meter_profile_data *mp,
+	uint64_t time,
+	uint32_t dscp,
+	uint16_t total_length)
+{
+	uint64_t drop_mask, sched;
+	uint64_t *sched_ptr = (uint64_t *) &mbuf->hash.sched;
+	struct dscp_table_entry_data *dscp_entry = &dscp_table->entry[dscp];
+	enum rte_meter_color color_in, color_meter, color_policer;
+	uint32_t tc, mp_id;
+
+	tc = dscp_entry->tc;
+	color_in = dscp_entry->color;
+	data += tc;
+	mp_id = MTR_TRTCM_DATA_METER_PROFILE_ID_GET(data);
+	sched = *sched_ptr;
+
+	/* Meter */
+	color_meter = rte_meter_trtcm_color_aware_check(
+		&data->trtcm,
+		&mp[mp_id].profile,
+		time,
+		total_length,
+		color_in);
+
+	/* Stats */
+	MTR_TRTCM_DATA_STATS_INC(data, color_meter);
+
+	/* Police */
+	drop_mask = MTR_TRTCM_DATA_POLICER_ACTION_DROP_GET(data, color_meter);
+	color_policer =
+		MTR_TRTCM_DATA_POLICER_ACTION_COLOR_GET(data, color_meter);
+	*sched_ptr = MBUF_SCHED_COLOR(sched, color_policer);
+
+	return drop_mask;
+}
+
+/**
+ * RTE_TABLE_ACTION_TM
+ */
+static int
+tm_cfg_check(struct rte_table_action_tm_config *tm)
+{
+	if ((tm->n_subports_per_port == 0) ||
+		(rte_is_power_of_2(tm->n_subports_per_port) == 0) ||
+		(tm->n_subports_per_port > UINT16_MAX) ||
+		(tm->n_pipes_per_subport == 0) ||
+		(rte_is_power_of_2(tm->n_pipes_per_subport) == 0))
+		return -ENOTSUP;
+
+	return 0;
+}
+
+struct tm_data {
+	uint16_t queue_tc_color;
+	uint16_t subport;
+	uint32_t pipe;
+} __attribute__((__packed__));
+
+static int
+tm_apply_check(struct rte_table_action_tm_params *p,
+	struct rte_table_action_tm_config *cfg)
+{
+	if ((p->subport_id >= cfg->n_subports_per_port) ||
+		(p->pipe_id >= cfg->n_pipes_per_subport))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+tm_apply(struct tm_data *data,
+	struct rte_table_action_tm_params *p,
+	struct rte_table_action_tm_config *cfg)
+{
+	int status;
+
+	/* Check input arguments */
+	status = tm_apply_check(p, cfg);
+	if (status)
+		return status;
+
+	/* Apply */
+	data->queue_tc_color = 0;
+	data->subport = (uint16_t) p->subport_id;
+	data->pipe = p->pipe_id;
+
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_tm(struct rte_mbuf *mbuf,
+	struct tm_data *data,
+	struct dscp_table_data *dscp_table,
+	uint32_t dscp)
+{
+	struct dscp_table_entry_data *dscp_entry = &dscp_table->entry[dscp];
+	struct tm_data *sched_ptr = (struct tm_data *) &mbuf->hash.sched;
+	struct tm_data sched;
+
+	sched = *data;
+	sched.queue_tc_color = dscp_entry->queue_tc_color;
+	*sched_ptr = sched;
+}
+
+/**
+ * RTE_TABLE_ACTION_ENCAP
+ */
+static int
+encap_valid(enum rte_table_action_encap_type encap)
+{
+	switch (encap) {
+	case RTE_TABLE_ACTION_ENCAP_ETHER:
+	case RTE_TABLE_ACTION_ENCAP_VLAN:
+	case RTE_TABLE_ACTION_ENCAP_QINQ:
+	case RTE_TABLE_ACTION_ENCAP_MPLS:
+	case RTE_TABLE_ACTION_ENCAP_PPPOE:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int
+encap_cfg_check(struct rte_table_action_encap_config *encap)
+{
+	if ((encap->encap_mask == 0) ||
+		(__builtin_popcountll(encap->encap_mask) != 1))
+		return -ENOTSUP;
+
+	return 0;
+}
+
+struct encap_ether_data {
+	struct ether_hdr ether;
+} __attribute__((__packed__));
+
+#define VLAN(pcp, dei, vid)                                \
+	((uint16_t)((((uint64_t)(pcp)) & 0x7LLU) << 13) |  \
+	((((uint64_t)(dei)) & 0x1LLU) << 12) |             \
+	(((uint64_t)(vid)) & 0xFFFLLU))                    \
+
+struct encap_vlan_data {
+	struct ether_hdr ether;
+	struct vlan_hdr vlan;
+} __attribute__((__packed__));
+
+struct encap_qinq_data {
+	struct ether_hdr ether;
+	struct vlan_hdr svlan;
+	struct vlan_hdr cvlan;
+} __attribute__((__packed__));
+
+#define ETHER_TYPE_MPLS_UNICAST                            0x8847
+
+#define ETHER_TYPE_MPLS_MULTICAST                          0x8848
+
+#define MPLS(label, tc, s, ttl)                            \
+	((uint32_t)(((((uint64_t)(label)) & 0xFFFFFLLU) << 12) |\
+	((((uint64_t)(tc)) & 0x7LLU) << 9) |               \
+	((((uint64_t)(s)) & 0x1LLU) << 8) |                \
+	(((uint64_t)(ttl)) & 0xFFLLU)))
+
+struct encap_mpls_data {
+	struct ether_hdr ether;
+	uint32_t mpls[RTE_TABLE_ACTION_MPLS_LABELS_MAX];
+	uint32_t mpls_count;
+} __attribute__((__packed__));
+
+#define ETHER_TYPE_PPPOE_SESSION                           0x8864
+
+#define PPP_PROTOCOL_IP                                    0x0021
+
+struct pppoe_ppp_hdr {
+	uint16_t ver_type_code;
+	uint16_t session_id;
+	uint16_t length;
+	uint16_t protocol;
+} __attribute__((__packed__));
+
+struct encap_pppoe_data {
+	struct ether_hdr ether;
+	struct pppoe_ppp_hdr pppoe_ppp;
+} __attribute__((__packed__));
+
+static size_t
+encap_data_size(struct rte_table_action_encap_config *encap)
+{
+	switch (encap->encap_mask) {
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_ETHER:
+		return sizeof(struct encap_ether_data);
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_VLAN:
+		return sizeof(struct encap_vlan_data);
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_QINQ:
+		return sizeof(struct encap_qinq_data);
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS:
+		return sizeof(struct encap_mpls_data);
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE:
+		return sizeof(struct encap_pppoe_data);
+
+	default:
+		return 0;
+	}
+}
+
+static int
+encap_apply_check(struct rte_table_action_encap_params *p,
+	struct rte_table_action_encap_config *cfg)
+{
+	if ((encap_valid(p->type) == 0) ||
+		((cfg->encap_mask & (1LLU << p->type)) == 0))
+		return -EINVAL;
+
+	switch (p->type) {
+	case RTE_TABLE_ACTION_ENCAP_ETHER:
+		return 0;
+
+	case RTE_TABLE_ACTION_ENCAP_VLAN:
+		return 0;
+
+	case RTE_TABLE_ACTION_ENCAP_QINQ:
+		return 0;
+
+	case RTE_TABLE_ACTION_ENCAP_MPLS:
+		if ((p->mpls.mpls_count == 0) ||
+			(p->mpls.mpls_count > RTE_TABLE_ACTION_MPLS_LABELS_MAX))
+			return -EINVAL;
+
+		return 0;
+
+	case RTE_TABLE_ACTION_ENCAP_PPPOE:
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+encap_ether_apply(void *data,
+	struct rte_table_action_encap_params *p,
+	struct rte_table_action_common_config *common_cfg)
+{
+	struct encap_ether_data *d = data;
+	uint16_t ethertype = (common_cfg->ip_version) ?
+		ETHER_TYPE_IPv4 :
+		ETHER_TYPE_IPv6;
+
+	/* Ethernet */
+	ether_addr_copy(&p->ether.ether.da, &d->ether.d_addr);
+	ether_addr_copy(&p->ether.ether.sa, &d->ether.s_addr);
+	d->ether.ether_type = rte_htons(ethertype);
+
+	return 0;
+}
+
+static int
+encap_vlan_apply(void *data,
+	struct rte_table_action_encap_params *p,
+	struct rte_table_action_common_config *common_cfg)
+{
+	struct encap_vlan_data *d = data;
+	uint16_t ethertype = (common_cfg->ip_version) ?
+		ETHER_TYPE_IPv4 :
+		ETHER_TYPE_IPv6;
+
+	/* Ethernet */
+	ether_addr_copy(&p->vlan.ether.da, &d->ether.d_addr);
+	ether_addr_copy(&p->vlan.ether.sa, &d->ether.s_addr);
+	d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN);
+
+	/* VLAN */
+	d->vlan.vlan_tci = rte_htons(VLAN(p->vlan.vlan.pcp,
+		p->vlan.vlan.dei,
+		p->vlan.vlan.vid));
+	d->vlan.eth_proto = rte_htons(ethertype);
+
+	return 0;
+}
+
+static int
+encap_qinq_apply(void *data,
+	struct rte_table_action_encap_params *p,
+	struct rte_table_action_common_config *common_cfg)
+{
+	struct encap_qinq_data *d = data;
+	uint16_t ethertype = (common_cfg->ip_version) ?
+		ETHER_TYPE_IPv4 :
+		ETHER_TYPE_IPv6;
+
+	/* Ethernet */
+	ether_addr_copy(&p->qinq.ether.da, &d->ether.d_addr);
+	ether_addr_copy(&p->qinq.ether.sa, &d->ether.s_addr);
+	d->ether.ether_type = rte_htons(ETHER_TYPE_QINQ);
+
+	/* SVLAN */
+	d->svlan.vlan_tci = rte_htons(VLAN(p->qinq.svlan.pcp,
+		p->qinq.svlan.dei,
+		p->qinq.svlan.vid));
+	d->svlan.eth_proto = rte_htons(ETHER_TYPE_VLAN);
+
+	/* CVLAN */
+	d->cvlan.vlan_tci = rte_htons(VLAN(p->qinq.cvlan.pcp,
+		p->qinq.cvlan.dei,
+		p->qinq.cvlan.vid));
+	d->cvlan.eth_proto = rte_htons(ethertype);
+
+	return 0;
+}
+
+static int
+encap_mpls_apply(void *data,
+	struct rte_table_action_encap_params *p)
+{
+	struct encap_mpls_data *d = data;
+	uint16_t ethertype = (p->mpls.unicast) ?
+		ETHER_TYPE_MPLS_UNICAST :
+		ETHER_TYPE_MPLS_MULTICAST;
+	uint32_t i;
+
+	/* Ethernet */
+	ether_addr_copy(&p->mpls.ether.da, &d->ether.d_addr);
+	ether_addr_copy(&p->mpls.ether.sa, &d->ether.s_addr);
+	d->ether.ether_type = rte_htons(ethertype);
+
+	/* MPLS */
+	for (i = 0; i < p->mpls.mpls_count - 1; i++)
+		d->mpls[i] = rte_htonl(MPLS(p->mpls.mpls[i].label,
+			p->mpls.mpls[i].tc,
+			0,
+			p->mpls.mpls[i].ttl));
+
+	d->mpls[i] = rte_htonl(MPLS(p->mpls.mpls[i].label,
+		p->mpls.mpls[i].tc,
+		1,
+		p->mpls.mpls[i].ttl));
+
+	d->mpls_count = p->mpls.mpls_count;
+	return 0;
+}
+
+static int
+encap_pppoe_apply(void *data,
+	struct rte_table_action_encap_params *p)
+{
+	struct encap_pppoe_data *d = data;
+
+	/* Ethernet */
+	ether_addr_copy(&p->pppoe.ether.da, &d->ether.d_addr);
+	ether_addr_copy(&p->pppoe.ether.sa, &d->ether.s_addr);
+	d->ether.ether_type = rte_htons(ETHER_TYPE_PPPOE_SESSION);
+
+	/* PPPoE and PPP*/
+	d->pppoe_ppp.ver_type_code = rte_htons(0x1100);
+	d->pppoe_ppp.session_id = rte_htons(p->pppoe.pppoe.session_id);
+	d->pppoe_ppp.length = 0; /* not pre-computed */
+	d->pppoe_ppp.protocol = rte_htons(PPP_PROTOCOL_IP);
+
+	return 0;
+}
+
+static int
+encap_apply(void *data,
+	struct rte_table_action_encap_params *p,
+	struct rte_table_action_encap_config *cfg,
+	struct rte_table_action_common_config *common_cfg)
+{
+	int status;
+
+	/* Check input arguments */
+	status = encap_apply_check(p, cfg);
+	if (status)
+		return status;
+
+	switch (p->type) {
+	case RTE_TABLE_ACTION_ENCAP_ETHER:
+		return encap_ether_apply(data, p, common_cfg);
+
+	case RTE_TABLE_ACTION_ENCAP_VLAN:
+		return encap_vlan_apply(data, p, common_cfg);
+
+	case RTE_TABLE_ACTION_ENCAP_QINQ:
+		return encap_qinq_apply(data, p, common_cfg);
+
+	case RTE_TABLE_ACTION_ENCAP_MPLS:
+		return encap_mpls_apply(data, p);
+
+	case RTE_TABLE_ACTION_ENCAP_PPPOE:
+		return encap_pppoe_apply(data, p);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static __rte_always_inline void *
+encap(void *dst, const void *src, size_t n)
+{
+	dst = ((uint8_t *) dst) - n;
+	return rte_memcpy(dst, src, n);
+}
+
+static __rte_always_inline void
+pkt_work_encap(struct rte_mbuf *mbuf,
+	void *data,
+	struct rte_table_action_encap_config *cfg,
+	void *ip,
+	uint16_t total_length,
+	uint32_t ip_offset)
+{
+	switch (cfg->encap_mask) {
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_ETHER:
+		encap(ip, data, sizeof(struct encap_ether_data));
+		mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) +
+			sizeof(struct encap_ether_data));
+		mbuf->pkt_len = mbuf->data_len = total_length +
+			sizeof(struct encap_ether_data);
+		break;
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_VLAN:
+		encap(ip, data, sizeof(struct encap_vlan_data));
+		mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) +
+			sizeof(struct encap_vlan_data));
+		mbuf->pkt_len = mbuf->data_len = total_length +
+			sizeof(struct encap_vlan_data);
+		break;
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_QINQ:
+		encap(ip, data, sizeof(struct encap_qinq_data));
+		mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) +
+			sizeof(struct encap_qinq_data));
+		mbuf->pkt_len = mbuf->data_len = total_length +
+			sizeof(struct encap_qinq_data);
+		break;
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS:
+	{
+		struct encap_mpls_data *mpls = data;
+		size_t size = sizeof(struct ether_hdr) +
+			mpls->mpls_count * 4;
+
+		encap(ip, data, size);
+		mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) + size);
+		mbuf->pkt_len = mbuf->data_len = total_length + size;
+		break;
+	}
+
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE:
+	{
+		struct encap_pppoe_data *pppoe =
+			encap(ip, data, sizeof(struct encap_pppoe_data));
+		pppoe->pppoe_ppp.length = rte_htons(total_length + 2);
+		mbuf->data_off = ip_offset - (sizeof(struct rte_mbuf) +
+			sizeof(struct encap_pppoe_data));
+		mbuf->pkt_len = mbuf->data_len = total_length +
+			sizeof(struct encap_pppoe_data);
+		break;
+	}
+
+	default:
+		break;
+	}
+}
+
+/**
+ * RTE_TABLE_ACTION_NAT
+ */
+static int
+nat_cfg_check(struct rte_table_action_nat_config *nat)
+{
+	if ((nat->proto != 0x06) &&
+		(nat->proto != 0x11))
+		return -ENOTSUP;
+
+	return 0;
+}
+
+struct nat_ipv4_data {
+	uint32_t addr;
+	uint16_t port;
+} __attribute__((__packed__));
+
+struct nat_ipv6_data {
+	uint8_t addr[16];
+	uint16_t port;
+} __attribute__((__packed__));
+
+static size_t
+nat_data_size(struct rte_table_action_nat_config *nat __rte_unused,
+	struct rte_table_action_common_config *common)
+{
+	int ip_version = common->ip_version;
+
+	return (ip_version) ?
+		sizeof(struct nat_ipv4_data) :
+		sizeof(struct nat_ipv6_data);
+}
+
+static int
+nat_apply_check(struct rte_table_action_nat_params *p,
+	struct rte_table_action_common_config *cfg)
+{
+	if ((p->ip_version && (cfg->ip_version == 0)) ||
+		((p->ip_version == 0) && cfg->ip_version))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+nat_apply(void *data,
+	struct rte_table_action_nat_params *p,
+	struct rte_table_action_common_config *cfg)
+{
+	int status;
+
+	/* Check input arguments */
+	status = nat_apply_check(p, cfg);
+	if (status)
+		return status;
+
+	/* Apply */
+	if (p->ip_version) {
+		struct nat_ipv4_data *d = data;
+
+		d->addr = rte_htonl(p->addr.ipv4);
+		d->port = rte_htons(p->port);
+	} else {
+		struct nat_ipv6_data *d = data;
+
+		memcpy(d->addr, p->addr.ipv6, sizeof(d->addr));
+		d->port = rte_htons(p->port);
+	}
+
+	return 0;
+}
+
+static __rte_always_inline uint16_t
+nat_ipv4_checksum_update(uint16_t cksum0,
+	uint32_t ip0,
+	uint32_t ip1)
+{
+	int32_t cksum1;
+
+	cksum1 = cksum0;
+	cksum1 = ~cksum1 & 0xFFFF;
+
+	/* Subtract ip0 (one's complement logic) */
+	cksum1 -= (ip0 >> 16) + (ip0 & 0xFFFF);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	/* Add ip1 (one's complement logic) */
+	cksum1 += (ip1 >> 16) + (ip1 & 0xFFFF);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	return (uint16_t)(~cksum1);
+}
+
+static __rte_always_inline uint16_t
+nat_ipv4_tcp_udp_checksum_update(uint16_t cksum0,
+	uint32_t ip0,
+	uint32_t ip1,
+	uint16_t port0,
+	uint16_t port1)
+{
+	int32_t cksum1;
+
+	cksum1 = cksum0;
+	cksum1 = ~cksum1 & 0xFFFF;
+
+	/* Subtract ip0 and port 0 (one's complement logic) */
+	cksum1 -= (ip0 >> 16) + (ip0 & 0xFFFF) + port0;
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	/* Add ip1 and port1 (one's complement logic) */
+	cksum1 += (ip1 >> 16) + (ip1 & 0xFFFF) + port1;
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	return (uint16_t)(~cksum1);
+}
+
+static __rte_always_inline uint16_t
+nat_ipv6_tcp_udp_checksum_update(uint16_t cksum0,
+	uint16_t *ip0,
+	uint16_t *ip1,
+	uint16_t port0,
+	uint16_t port1)
+{
+	int32_t cksum1;
+
+	cksum1 = cksum0;
+	cksum1 = ~cksum1 & 0xFFFF;
+
+	/* Subtract ip0 and port 0 (one's complement logic) */
+	cksum1 -= ip0[0] + ip0[1] + ip0[2] + ip0[3] +
+		ip0[4] + ip0[5] + ip0[6] + ip0[7] + port0;
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	/* Add ip1 and port1 (one's complement logic) */
+	cksum1 += ip1[0] + ip1[1] + ip1[2] + ip1[3] +
+		ip1[4] + ip1[5] + ip1[6] + ip1[7] + port1;
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	return (uint16_t)(~cksum1);
+}
+
+static __rte_always_inline void
+pkt_ipv4_work_nat(struct ipv4_hdr *ip,
+	struct nat_ipv4_data *data,
+	struct rte_table_action_nat_config *cfg)
+{
+	if (cfg->source_nat) {
+		if (cfg->proto == 0x6) {
+			struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1];
+			uint16_t ip_cksum, tcp_cksum;
+
+			ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum,
+				ip->src_addr,
+				data->addr);
+
+			tcp_cksum = nat_ipv4_tcp_udp_checksum_update(tcp->cksum,
+				ip->src_addr,
+				data->addr,
+				tcp->src_port,
+				data->port);
+
+			ip->src_addr = data->addr;
+			ip->hdr_checksum = ip_cksum;
+			tcp->src_port = data->port;
+			tcp->cksum = tcp_cksum;
+		} else {
+			struct udp_hdr *udp = (struct udp_hdr *) &ip[1];
+			uint16_t ip_cksum, udp_cksum;
+
+			ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum,
+				ip->src_addr,
+				data->addr);
+
+			udp_cksum = nat_ipv4_tcp_udp_checksum_update(udp->dgram_cksum,
+				ip->src_addr,
+				data->addr,
+				udp->src_port,
+				data->port);
+
+			ip->src_addr = data->addr;
+			ip->hdr_checksum = ip_cksum;
+			udp->src_port = data->port;
+			if (udp->dgram_cksum)
+				udp->dgram_cksum = udp_cksum;
+		}
+	} else {
+		if (cfg->proto == 0x6) {
+			struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1];
+			uint16_t ip_cksum, tcp_cksum;
+
+			ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum,
+				ip->dst_addr,
+				data->addr);
+
+			tcp_cksum = nat_ipv4_tcp_udp_checksum_update(tcp->cksum,
+				ip->dst_addr,
+				data->addr,
+				tcp->dst_port,
+				data->port);
+
+			ip->dst_addr = data->addr;
+			ip->hdr_checksum = ip_cksum;
+			tcp->dst_port = data->port;
+			tcp->cksum = tcp_cksum;
+		} else {
+			struct udp_hdr *udp = (struct udp_hdr *) &ip[1];
+			uint16_t ip_cksum, udp_cksum;
+
+			ip_cksum = nat_ipv4_checksum_update(ip->hdr_checksum,
+				ip->dst_addr,
+				data->addr);
+
+			udp_cksum = nat_ipv4_tcp_udp_checksum_update(udp->dgram_cksum,
+				ip->dst_addr,
+				data->addr,
+				udp->dst_port,
+				data->port);
+
+			ip->dst_addr = data->addr;
+			ip->hdr_checksum = ip_cksum;
+			udp->dst_port = data->port;
+			if (udp->dgram_cksum)
+				udp->dgram_cksum = udp_cksum;
+		}
+	}
+}
+
+static __rte_always_inline void
+pkt_ipv6_work_nat(struct ipv6_hdr *ip,
+	struct nat_ipv6_data *data,
+	struct rte_table_action_nat_config *cfg)
+{
+	if (cfg->source_nat) {
+		if (cfg->proto == 0x6) {
+			struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1];
+			uint16_t tcp_cksum;
+
+			tcp_cksum = nat_ipv6_tcp_udp_checksum_update(tcp->cksum,
+				(uint16_t *)ip->src_addr,
+				(uint16_t *)data->addr,
+				tcp->src_port,
+				data->port);
+
+			rte_memcpy(ip->src_addr, data->addr, 16);
+			tcp->src_port = data->port;
+			tcp->cksum = tcp_cksum;
+		} else {
+			struct udp_hdr *udp = (struct udp_hdr *) &ip[1];
+			uint16_t udp_cksum;
+
+			udp_cksum = nat_ipv6_tcp_udp_checksum_update(udp->dgram_cksum,
+				(uint16_t *)ip->src_addr,
+				(uint16_t *)data->addr,
+				udp->src_port,
+				data->port);
+
+			rte_memcpy(ip->src_addr, data->addr, 16);
+			udp->src_port = data->port;
+			udp->dgram_cksum = udp_cksum;
+		}
+	} else {
+		if (cfg->proto == 0x6) {
+			struct tcp_hdr *tcp = (struct tcp_hdr *) &ip[1];
+			uint16_t tcp_cksum;
+
+			tcp_cksum = nat_ipv6_tcp_udp_checksum_update(tcp->cksum,
+				(uint16_t *)ip->dst_addr,
+				(uint16_t *)data->addr,
+				tcp->dst_port,
+				data->port);
+
+			rte_memcpy(ip->dst_addr, data->addr, 16);
+			tcp->dst_port = data->port;
+			tcp->cksum = tcp_cksum;
+		} else {
+			struct udp_hdr *udp = (struct udp_hdr *) &ip[1];
+			uint16_t udp_cksum;
+
+			udp_cksum = nat_ipv6_tcp_udp_checksum_update(udp->dgram_cksum,
+				(uint16_t *)ip->dst_addr,
+				(uint16_t *)data->addr,
+				udp->dst_port,
+				data->port);
+
+			rte_memcpy(ip->dst_addr, data->addr, 16);
+			udp->dst_port = data->port;
+			udp->dgram_cksum = udp_cksum;
+		}
+	}
+}
+
+/**
+ * RTE_TABLE_ACTION_TTL
+ */
+static int
+ttl_cfg_check(struct rte_table_action_ttl_config *ttl)
+{
+	if (ttl->drop == 0)
+		return -ENOTSUP;
+
+	return 0;
+}
+
+struct ttl_data {
+	uint32_t n_packets;
+} __attribute__((__packed__));
+
+#define TTL_INIT(data, decrement)                         \
+	((data)->n_packets = (decrement) ? 1 : 0)
+
+#define TTL_DEC_GET(data)                                  \
+	((uint8_t)((data)->n_packets & 1))
+
+#define TTL_STATS_RESET(data)                             \
+	((data)->n_packets = ((data)->n_packets & 1))
+
+#define TTL_STATS_READ(data)                               \
+	((data)->n_packets >> 1)
+
+#define TTL_STATS_ADD(data, value)                        \
+	((data)->n_packets =                                  \
+		(((((data)->n_packets >> 1) + (value)) << 1) |    \
+		((data)->n_packets & 1)))
+
+static int
+ttl_apply(void *data,
+	struct rte_table_action_ttl_params *p)
+{
+	struct ttl_data *d = data;
+
+	TTL_INIT(d, p->decrement);
+
+	return 0;
+}
+
+static __rte_always_inline uint64_t
+pkt_ipv4_work_ttl(struct ipv4_hdr *ip,
+	struct ttl_data *data)
+{
+	uint32_t drop;
+	uint16_t cksum = ip->hdr_checksum;
+	uint8_t ttl = ip->time_to_live;
+	uint8_t ttl_diff = TTL_DEC_GET(data);
+
+	cksum += ttl_diff;
+	ttl -= ttl_diff;
+
+	ip->hdr_checksum = cksum;
+	ip->time_to_live = ttl;
+
+	drop = (ttl == 0) ? 1 : 0;
+	TTL_STATS_ADD(data, drop);
+
+	return drop;
+}
+
+static __rte_always_inline uint64_t
+pkt_ipv6_work_ttl(struct ipv6_hdr *ip,
+	struct ttl_data *data)
+{
+	uint32_t drop;
+	uint8_t ttl = ip->hop_limits;
+	uint8_t ttl_diff = TTL_DEC_GET(data);
+
+	ttl -= ttl_diff;
+
+	ip->hop_limits = ttl;
+
+	drop = (ttl == 0) ? 1 : 0;
+	TTL_STATS_ADD(data, drop);
+
+	return drop;
+}
+
+/**
+ * RTE_TABLE_ACTION_STATS
+ */
+static int
+stats_cfg_check(struct rte_table_action_stats_config *stats)
+{
+	if ((stats->n_packets_enabled == 0) && (stats->n_bytes_enabled == 0))
+		return -EINVAL;
+
+	return 0;
+}
+
+struct stats_data {
+	uint64_t n_packets;
+	uint64_t n_bytes;
+} __attribute__((__packed__));
+
+static int
+stats_apply(struct stats_data *data,
+	struct rte_table_action_stats_params *p)
+{
+	data->n_packets = p->n_packets;
+	data->n_bytes = p->n_bytes;
+
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_stats(struct stats_data *data,
+	uint16_t total_length)
+{
+	data->n_packets++;
+	data->n_bytes += total_length;
+}
+
+/**
+ * RTE_TABLE_ACTION_TIME
+ */
+struct time_data {
+	uint64_t time;
+} __attribute__((__packed__));
+
+static int
+time_apply(struct time_data *data,
+	struct rte_table_action_time_params *p)
+{
+	data->time = p->time;
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_time(struct time_data *data,
+	uint64_t time)
+{
+	data->time = time;
+}
+
+/**
+ * Action profile
+ */
+static int
+action_valid(enum rte_table_action_type action)
+{
+	switch (action) {
+	case RTE_TABLE_ACTION_FWD:
+	case RTE_TABLE_ACTION_LB:
+	case RTE_TABLE_ACTION_MTR:
+	case RTE_TABLE_ACTION_TM:
+	case RTE_TABLE_ACTION_ENCAP:
+	case RTE_TABLE_ACTION_NAT:
+	case RTE_TABLE_ACTION_TTL:
+	case RTE_TABLE_ACTION_STATS:
+	case RTE_TABLE_ACTION_TIME:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+
+#define RTE_TABLE_ACTION_MAX                      64
+
+struct ap_config {
+	uint64_t action_mask;
+	struct rte_table_action_common_config common;
+	struct rte_table_action_lb_config lb;
+	struct rte_table_action_mtr_config mtr;
+	struct rte_table_action_tm_config tm;
+	struct rte_table_action_encap_config encap;
+	struct rte_table_action_nat_config nat;
+	struct rte_table_action_ttl_config ttl;
+	struct rte_table_action_stats_config stats;
+};
+
+static size_t
+action_cfg_size(enum rte_table_action_type action)
+{
+	switch (action) {
+	case RTE_TABLE_ACTION_LB:
+		return sizeof(struct rte_table_action_lb_config);
+	case RTE_TABLE_ACTION_MTR:
+		return sizeof(struct rte_table_action_mtr_config);
+	case RTE_TABLE_ACTION_TM:
+		return sizeof(struct rte_table_action_tm_config);
+	case RTE_TABLE_ACTION_ENCAP:
+		return sizeof(struct rte_table_action_encap_config);
+	case RTE_TABLE_ACTION_NAT:
+		return sizeof(struct rte_table_action_nat_config);
+	case RTE_TABLE_ACTION_TTL:
+		return sizeof(struct rte_table_action_ttl_config);
+	case RTE_TABLE_ACTION_STATS:
+		return sizeof(struct rte_table_action_stats_config);
+	default:
+		return 0;
+	}
+}
+
+static void*
+action_cfg_get(struct ap_config *ap_config,
+	enum rte_table_action_type type)
+{
+	switch (type) {
+	case RTE_TABLE_ACTION_LB:
+		return &ap_config->lb;
+
+	case RTE_TABLE_ACTION_MTR:
+		return &ap_config->mtr;
+
+	case RTE_TABLE_ACTION_TM:
+		return &ap_config->tm;
+
+	case RTE_TABLE_ACTION_ENCAP:
+		return &ap_config->encap;
+
+	case RTE_TABLE_ACTION_NAT:
+		return &ap_config->nat;
+
+	case RTE_TABLE_ACTION_TTL:
+		return &ap_config->ttl;
+
+	case RTE_TABLE_ACTION_STATS:
+		return &ap_config->stats;
+
+	default:
+		return NULL;
+	}
+}
+
+static void
+action_cfg_set(struct ap_config *ap_config,
+	enum rte_table_action_type type,
+	void *action_cfg)
+{
+	void *dst = action_cfg_get(ap_config, type);
+
+	if (dst)
+		memcpy(dst, action_cfg, action_cfg_size(type));
+
+	ap_config->action_mask |= 1LLU << type;
+}
+
+struct ap_data {
+	size_t offset[RTE_TABLE_ACTION_MAX];
+	size_t total_size;
+};
+
+static size_t
+action_data_size(enum rte_table_action_type action,
+	struct ap_config *ap_config)
+{
+	switch (action) {
+	case RTE_TABLE_ACTION_FWD:
+		return sizeof(struct fwd_data);
+
+	case RTE_TABLE_ACTION_LB:
+		return sizeof(struct lb_data);
+
+	case RTE_TABLE_ACTION_MTR:
+		return mtr_data_size(&ap_config->mtr);
+
+	case RTE_TABLE_ACTION_TM:
+		return sizeof(struct tm_data);
+
+	case RTE_TABLE_ACTION_ENCAP:
+		return encap_data_size(&ap_config->encap);
+
+	case RTE_TABLE_ACTION_NAT:
+		return nat_data_size(&ap_config->nat,
+			&ap_config->common);
+
+	case RTE_TABLE_ACTION_TTL:
+		return sizeof(struct ttl_data);
+
+	case RTE_TABLE_ACTION_STATS:
+		return sizeof(struct stats_data);
+
+	case RTE_TABLE_ACTION_TIME:
+		return sizeof(struct time_data);
+
+	default:
+		return 0;
+	}
+}
+
+
+static void
+action_data_offset_set(struct ap_data *ap_data,
+	struct ap_config *ap_config)
+{
+	uint64_t action_mask = ap_config->action_mask;
+	size_t offset;
+	uint32_t action;
+
+	memset(ap_data->offset, 0, sizeof(ap_data->offset));
+
+	offset = 0;
+	for (action = 0; action < RTE_TABLE_ACTION_MAX; action++)
+		if (action_mask & (1LLU << action)) {
+			ap_data->offset[action] = offset;
+			offset += action_data_size((enum rte_table_action_type)action,
+				ap_config);
+		}
+
+	ap_data->total_size = offset;
+}
+
+struct rte_table_action_profile {
+	struct ap_config cfg;
+	struct ap_data data;
+	int frozen;
+};
+
+struct rte_table_action_profile *
+rte_table_action_profile_create(struct rte_table_action_common_config *common)
+{
+	struct rte_table_action_profile *ap;
+
+	/* Check input arguments */
+	if (common == NULL)
+		return NULL;
+
+	/* Memory allocation */
+	ap = calloc(1, sizeof(struct rte_table_action_profile));
+	if (ap == NULL)
+		return NULL;
+
+	/* Initialization */
+	memcpy(&ap->cfg.common, common, sizeof(*common));
+
+	return ap;
+}
+
+
+int
+rte_table_action_profile_action_register(struct rte_table_action_profile *profile,
+	enum rte_table_action_type type,
+	void *action_config)
+{
+	int status;
+
+	/* Check input arguments */
+	if ((profile == NULL) ||
+		profile->frozen ||
+		(action_valid(type) == 0) ||
+		(profile->cfg.action_mask & (1LLU << type)) ||
+		((action_cfg_size(type) == 0) && action_config) ||
+		(action_cfg_size(type) && (action_config == NULL)))
+		return -EINVAL;
+
+	switch (type) {
+	case RTE_TABLE_ACTION_LB:
+		status = lb_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_MTR:
+		status = mtr_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_TM:
+		status = tm_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_ENCAP:
+		status = encap_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_NAT:
+		status = nat_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_TTL:
+		status = ttl_cfg_check(action_config);
+		break;
+
+	case RTE_TABLE_ACTION_STATS:
+		status = stats_cfg_check(action_config);
+		break;
+
+	default:
+		status = 0;
+		break;
+	}
+
+	if (status)
+		return status;
+
+	/* Action enable */
+	action_cfg_set(&profile->cfg, type, action_config);
+
+	return 0;
+}
+
+int
+rte_table_action_profile_freeze(struct rte_table_action_profile *profile)
+{
+	if (profile->frozen)
+		return -EBUSY;
+
+	profile->cfg.action_mask |= 1LLU << RTE_TABLE_ACTION_FWD;
+	action_data_offset_set(&profile->data, &profile->cfg);
+	profile->frozen = 1;
+
+	return 0;
+}
+
+int
+rte_table_action_profile_free(struct rte_table_action_profile *profile)
+{
+	if (profile == NULL)
+		return 0;
+
+	free(profile);
+	return 0;
+}
+
+/**
+ * Action
+ */
+#define METER_PROFILES_MAX                                 32
+
+struct rte_table_action {
+	struct ap_config cfg;
+	struct ap_data data;
+	struct dscp_table_data dscp_table;
+	struct meter_profile_data mp[METER_PROFILES_MAX];
+};
+
+struct rte_table_action *
+rte_table_action_create(struct rte_table_action_profile *profile,
+	uint32_t socket_id)
+{
+	struct rte_table_action *action;
+
+	/* Check input arguments */
+	if ((profile == NULL) ||
+		(profile->frozen == 0))
+		return NULL;
+
+	/* Memory allocation */
+	action = rte_zmalloc_socket(NULL,
+		sizeof(struct rte_table_action),
+		RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (action == NULL)
+		return NULL;
+
+	/* Initialization */
+	memcpy(&action->cfg, &profile->cfg, sizeof(profile->cfg));
+	memcpy(&action->data, &profile->data, sizeof(profile->data));
+
+	return action;
+}
+
+static __rte_always_inline void *
+action_data_get(void *data,
+	struct rte_table_action *action,
+	enum rte_table_action_type type)
+{
+	size_t offset = action->data.offset[type];
+	uint8_t *data_bytes = data;
+
+	return &data_bytes[offset];
+}
+
+int
+rte_table_action_apply(struct rte_table_action *action,
+	void *data,
+	enum rte_table_action_type type,
+	void *action_params)
+{
+	void *action_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		(data == NULL) ||
+		(action_valid(type) == 0) ||
+		((action->cfg.action_mask & (1LLU << type)) == 0) ||
+		(action_params == NULL))
+		return -EINVAL;
+
+	/* Data update */
+	action_data = action_data_get(data, action, type);
+
+	switch (type) {
+	case RTE_TABLE_ACTION_FWD:
+		return fwd_apply(action_data,
+			action_params);
+
+	case RTE_TABLE_ACTION_LB:
+		return lb_apply(action_data,
+			action_params);
+
+	case RTE_TABLE_ACTION_MTR:
+		return mtr_apply(action_data,
+			action_params,
+			&action->cfg.mtr,
+			action->mp,
+			RTE_DIM(action->mp));
+
+	case RTE_TABLE_ACTION_TM:
+		return tm_apply(action_data,
+			action_params,
+			&action->cfg.tm);
+
+	case RTE_TABLE_ACTION_ENCAP:
+		return encap_apply(action_data,
+			action_params,
+			&action->cfg.encap,
+			&action->cfg.common);
+
+	case RTE_TABLE_ACTION_NAT:
+		return nat_apply(action_data,
+			action_params,
+			&action->cfg.common);
+
+	case RTE_TABLE_ACTION_TTL:
+		return ttl_apply(action_data,
+			action_params);
+
+	case RTE_TABLE_ACTION_STATS:
+		return stats_apply(action_data,
+			action_params);
+
+	case RTE_TABLE_ACTION_TIME:
+		return time_apply(action_data,
+			action_params);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+rte_table_action_dscp_table_update(struct rte_table_action *action,
+	uint64_t dscp_mask,
+	struct rte_table_action_dscp_table *table)
+{
+	uint32_t i;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask & ((1LLU << RTE_TABLE_ACTION_MTR) |
+		(1LLU << RTE_TABLE_ACTION_TM))) == 0) ||
+		(dscp_mask == 0) ||
+		(table == NULL))
+		return -EINVAL;
+
+	for (i = 0; i < RTE_DIM(table->entry); i++) {
+		struct dscp_table_entry_data *data =
+			&action->dscp_table.entry[i];
+		struct rte_table_action_dscp_table_entry *entry =
+			&table->entry[i];
+		uint16_t queue_tc_color =
+			MBUF_SCHED_QUEUE_TC_COLOR(entry->tc_queue_id,
+				entry->tc_id,
+				entry->color);
+
+		if ((dscp_mask & (1LLU << i)) == 0)
+			continue;
+
+		data->color = entry->color;
+		data->tc = entry->tc_id;
+		data->queue_tc_color = queue_tc_color;
+	}
+
+	return 0;
+}
+
+int
+rte_table_action_meter_profile_add(struct rte_table_action *action,
+	uint32_t meter_profile_id,
+	struct rte_table_action_meter_profile *profile)
+{
+	struct meter_profile_data *mp_data;
+	uint32_t status;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0) ||
+		(profile == NULL))
+		return -EINVAL;
+
+	if (profile->alg != RTE_TABLE_ACTION_METER_TRTCM)
+		return -ENOTSUP;
+
+	mp_data = meter_profile_data_find(action->mp,
+		RTE_DIM(action->mp),
+		meter_profile_id);
+	if (mp_data)
+		return -EEXIST;
+
+	mp_data = meter_profile_data_find_unused(action->mp,
+		RTE_DIM(action->mp));
+	if (!mp_data)
+		return -ENOSPC;
+
+	/* Install new profile */
+	status = rte_meter_trtcm_profile_config(&mp_data->profile,
+		&profile->trtcm);
+	if (status)
+		return status;
+
+	mp_data->profile_id = meter_profile_id;
+	mp_data->valid = 1;
+
+	return 0;
+}
+
+int
+rte_table_action_meter_profile_delete(struct rte_table_action *action,
+	uint32_t meter_profile_id)
+{
+	struct meter_profile_data *mp_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0))
+		return -EINVAL;
+
+	mp_data = meter_profile_data_find(action->mp,
+		RTE_DIM(action->mp),
+		meter_profile_id);
+	if (!mp_data)
+		return 0;
+
+	/* Uninstall profile */
+	mp_data->valid = 0;
+
+	return 0;
+}
+
+int
+rte_table_action_meter_read(struct rte_table_action *action,
+	void *data,
+	uint32_t tc_mask,
+	struct rte_table_action_mtr_counters *stats,
+	int clear)
+{
+	struct mtr_trtcm_data *mtr_data;
+	uint32_t i;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0) ||
+		(data == NULL) ||
+		(tc_mask > RTE_LEN2MASK(action->cfg.mtr.n_tc, uint32_t)))
+		return -EINVAL;
+
+	mtr_data = action_data_get(data, action, RTE_TABLE_ACTION_MTR);
+
+	/* Read */
+	if (stats) {
+		for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) {
+			struct rte_table_action_mtr_counters_tc *dst =
+				&stats->stats[i];
+			struct mtr_trtcm_data *src = &mtr_data[i];
+
+			if ((tc_mask & (1 << i)) == 0)
+				continue;
+
+			dst->n_packets[e_RTE_METER_GREEN] =
+				mtr_trtcm_data_stats_get(src, e_RTE_METER_GREEN);
+
+			dst->n_packets[e_RTE_METER_YELLOW] =
+				mtr_trtcm_data_stats_get(src, e_RTE_METER_YELLOW);
+
+			dst->n_packets[e_RTE_METER_RED] =
+				mtr_trtcm_data_stats_get(src, e_RTE_METER_RED);
+
+			dst->n_packets_valid = 1;
+			dst->n_bytes_valid = 0;
+		}
+
+		stats->tc_mask = tc_mask;
+	}
+
+	/* Clear */
+	if (clear)
+		for (i = 0; i < RTE_TABLE_ACTION_TC_MAX; i++) {
+			struct mtr_trtcm_data *src = &mtr_data[i];
+
+			if ((tc_mask & (1 << i)) == 0)
+				continue;
+
+			mtr_trtcm_data_stats_reset(src, e_RTE_METER_GREEN);
+			mtr_trtcm_data_stats_reset(src, e_RTE_METER_YELLOW);
+			mtr_trtcm_data_stats_reset(src, e_RTE_METER_RED);
+		}
+
+
+	return 0;
+}
+
+int
+rte_table_action_ttl_read(struct rte_table_action *action,
+	void *data,
+	struct rte_table_action_ttl_counters *stats,
+	int clear)
+{
+	struct ttl_data *ttl_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask &
+		(1LLU << RTE_TABLE_ACTION_TTL)) == 0) ||
+		(data == NULL))
+		return -EINVAL;
+
+	ttl_data = action_data_get(data, action, RTE_TABLE_ACTION_TTL);
+
+	/* Read */
+	if (stats)
+		stats->n_packets = TTL_STATS_READ(ttl_data);
+
+	/* Clear */
+	if (clear)
+		TTL_STATS_RESET(ttl_data);
+
+	return 0;
+}
+
+int
+rte_table_action_stats_read(struct rte_table_action *action,
+	void *data,
+	struct rte_table_action_stats_counters *stats,
+	int clear)
+{
+	struct stats_data *stats_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask &
+		(1LLU << RTE_TABLE_ACTION_STATS)) == 0) ||
+		(data == NULL))
+		return -EINVAL;
+
+	stats_data = action_data_get(data, action,
+		RTE_TABLE_ACTION_STATS);
+
+	/* Read */
+	if (stats) {
+		stats->n_packets = stats_data->n_packets;
+		stats->n_bytes = stats_data->n_bytes;
+		stats->n_packets_valid = 1;
+		stats->n_bytes_valid = 1;
+	}
+
+	/* Clear */
+	if (clear) {
+		stats_data->n_packets = 0;
+		stats_data->n_bytes = 0;
+	}
+
+	return 0;
+}
+
+int
+rte_table_action_time_read(struct rte_table_action *action,
+	void *data,
+	uint64_t *timestamp)
+{
+	struct time_data *time_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask &
+		(1LLU << RTE_TABLE_ACTION_TIME)) == 0) ||
+		(data == NULL) ||
+		(timestamp == NULL))
+		return -EINVAL;
+
+	time_data = action_data_get(data, action, RTE_TABLE_ACTION_TIME);
+
+	/* Read */
+	*timestamp = time_data->time;
+
+	return 0;
+}
+
+static __rte_always_inline uint64_t
+pkt_work(struct rte_mbuf *mbuf,
+	struct rte_pipeline_table_entry *table_entry,
+	uint64_t time,
+	struct rte_table_action *action,
+	struct ap_config *cfg)
+{
+	uint64_t drop_mask = 0;
+
+	uint32_t ip_offset = action->cfg.common.ip_offset;
+	void *ip = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ip_offset);
+
+	uint32_t dscp;
+	uint16_t total_length;
+
+	if (cfg->common.ip_version) {
+		struct ipv4_hdr *hdr = ip;
+
+		dscp = hdr->type_of_service >> 2;
+		total_length = rte_ntohs(hdr->total_length);
+	} else {
+		struct ipv6_hdr *hdr = ip;
+
+		dscp = (rte_ntohl(hdr->vtc_flow) & 0x0F600000) >> 18;
+		total_length =
+			rte_ntohs(hdr->payload_len) + sizeof(struct ipv6_hdr);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_LB)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_LB);
+
+		pkt_work_lb(mbuf,
+			data,
+			&cfg->lb);
+	}
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_MTR);
+
+		drop_mask |= pkt_work_mtr(mbuf,
+			data,
+			&action->dscp_table,
+			action->mp,
+			time,
+			dscp,
+			total_length);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TM)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_TM);
+
+		pkt_work_tm(mbuf,
+			data,
+			&action->dscp_table,
+			dscp);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_ENCAP);
+
+		pkt_work_encap(mbuf,
+			data,
+			&cfg->encap,
+			ip,
+			total_length,
+			ip_offset);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_NAT)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_NAT);
+
+		if (cfg->common.ip_version)
+			pkt_ipv4_work_nat(ip, data, &cfg->nat);
+		else
+			pkt_ipv6_work_nat(ip, data, &cfg->nat);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TTL)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_TTL);
+
+		if (cfg->common.ip_version)
+			drop_mask |= pkt_ipv4_work_ttl(ip, data);
+		else
+			drop_mask |= pkt_ipv6_work_ttl(ip, data);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_STATS)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_STATS);
+
+		pkt_work_stats(data, total_length);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TIME)) {
+		void *data =
+			action_data_get(table_entry, action, RTE_TABLE_ACTION_TIME);
+
+		pkt_work_time(data, time);
+	}
+
+	return drop_mask;
+}
+
+static __rte_always_inline uint64_t
+pkt4_work(struct rte_mbuf **mbufs,
+	struct rte_pipeline_table_entry **table_entries,
+	uint64_t time,
+	struct rte_table_action *action,
+	struct ap_config *cfg)
+{
+	uint64_t drop_mask0 = 0;
+	uint64_t drop_mask1 = 0;
+	uint64_t drop_mask2 = 0;
+	uint64_t drop_mask3 = 0;
+
+	struct rte_mbuf *mbuf0 = mbufs[0];
+	struct rte_mbuf *mbuf1 = mbufs[1];
+	struct rte_mbuf *mbuf2 = mbufs[2];
+	struct rte_mbuf *mbuf3 = mbufs[3];
+
+	struct rte_pipeline_table_entry *table_entry0 = table_entries[0];
+	struct rte_pipeline_table_entry *table_entry1 = table_entries[1];
+	struct rte_pipeline_table_entry *table_entry2 = table_entries[2];
+	struct rte_pipeline_table_entry *table_entry3 = table_entries[3];
+
+	uint32_t ip_offset = action->cfg.common.ip_offset;
+	void *ip0 = RTE_MBUF_METADATA_UINT32_PTR(mbuf0, ip_offset);
+	void *ip1 = RTE_MBUF_METADATA_UINT32_PTR(mbuf1, ip_offset);
+	void *ip2 = RTE_MBUF_METADATA_UINT32_PTR(mbuf2, ip_offset);
+	void *ip3 = RTE_MBUF_METADATA_UINT32_PTR(mbuf3, ip_offset);
+
+	uint32_t dscp0, dscp1, dscp2, dscp3;
+	uint16_t total_length0, total_length1, total_length2, total_length3;
+
+	if (cfg->common.ip_version) {
+		struct ipv4_hdr *hdr0 = ip0;
+		struct ipv4_hdr *hdr1 = ip1;
+		struct ipv4_hdr *hdr2 = ip2;
+		struct ipv4_hdr *hdr3 = ip3;
+
+		dscp0 = hdr0->type_of_service >> 2;
+		dscp1 = hdr1->type_of_service >> 2;
+		dscp2 = hdr2->type_of_service >> 2;
+		dscp3 = hdr3->type_of_service >> 2;
+
+		total_length0 = rte_ntohs(hdr0->total_length);
+		total_length1 = rte_ntohs(hdr1->total_length);
+		total_length2 = rte_ntohs(hdr2->total_length);
+		total_length3 = rte_ntohs(hdr3->total_length);
+	} else {
+		struct ipv6_hdr *hdr0 = ip0;
+		struct ipv6_hdr *hdr1 = ip1;
+		struct ipv6_hdr *hdr2 = ip2;
+		struct ipv6_hdr *hdr3 = ip3;
+
+		dscp0 = (rte_ntohl(hdr0->vtc_flow) & 0x0F600000) >> 18;
+		dscp1 = (rte_ntohl(hdr1->vtc_flow) & 0x0F600000) >> 18;
+		dscp2 = (rte_ntohl(hdr2->vtc_flow) & 0x0F600000) >> 18;
+		dscp3 = (rte_ntohl(hdr3->vtc_flow) & 0x0F600000) >> 18;
+
+		total_length0 =
+			rte_ntohs(hdr0->payload_len) + sizeof(struct ipv6_hdr);
+		total_length1 =
+			rte_ntohs(hdr1->payload_len) + sizeof(struct ipv6_hdr);
+		total_length2 =
+			rte_ntohs(hdr2->payload_len) + sizeof(struct ipv6_hdr);
+		total_length3 =
+			rte_ntohs(hdr3->payload_len) + sizeof(struct ipv6_hdr);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_LB)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_LB);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_LB);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_LB);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_LB);
+
+		pkt_work_lb(mbuf0,
+			data0,
+			&cfg->lb);
+
+		pkt_work_lb(mbuf1,
+			data1,
+			&cfg->lb);
+
+		pkt_work_lb(mbuf2,
+			data2,
+			&cfg->lb);
+
+		pkt_work_lb(mbuf3,
+			data3,
+			&cfg->lb);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_MTR);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_MTR);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_MTR);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_MTR);
+
+		drop_mask0 |= pkt_work_mtr(mbuf0,
+			data0,
+			&action->dscp_table,
+			action->mp,
+			time,
+			dscp0,
+			total_length0);
+
+		drop_mask1 |= pkt_work_mtr(mbuf1,
+			data1,
+			&action->dscp_table,
+			action->mp,
+			time,
+			dscp1,
+			total_length1);
+
+		drop_mask2 |= pkt_work_mtr(mbuf2,
+			data2,
+			&action->dscp_table,
+			action->mp,
+			time,
+			dscp2,
+			total_length2);
+
+		drop_mask3 |= pkt_work_mtr(mbuf3,
+			data3,
+			&action->dscp_table,
+			action->mp,
+			time,
+			dscp3,
+			total_length3);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TM)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_TM);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_TM);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_TM);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_TM);
+
+		pkt_work_tm(mbuf0,
+			data0,
+			&action->dscp_table,
+			dscp0);
+
+		pkt_work_tm(mbuf1,
+			data1,
+			&action->dscp_table,
+			dscp1);
+
+		pkt_work_tm(mbuf2,
+			data2,
+			&action->dscp_table,
+			dscp2);
+
+		pkt_work_tm(mbuf3,
+			data3,
+			&action->dscp_table,
+			dscp3);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_ENCAP);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_ENCAP);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_ENCAP);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_ENCAP);
+
+		pkt_work_encap(mbuf0,
+			data0,
+			&cfg->encap,
+			ip0,
+			total_length0,
+			ip_offset);
+
+		pkt_work_encap(mbuf1,
+			data1,
+			&cfg->encap,
+			ip1,
+			total_length1,
+			ip_offset);
+
+		pkt_work_encap(mbuf2,
+			data2,
+			&cfg->encap,
+			ip2,
+			total_length2,
+			ip_offset);
+
+		pkt_work_encap(mbuf3,
+			data3,
+			&cfg->encap,
+			ip3,
+			total_length3,
+			ip_offset);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_NAT)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_NAT);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_NAT);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_NAT);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_NAT);
+
+		if (cfg->common.ip_version) {
+			pkt_ipv4_work_nat(ip0, data0, &cfg->nat);
+			pkt_ipv4_work_nat(ip1, data1, &cfg->nat);
+			pkt_ipv4_work_nat(ip2, data2, &cfg->nat);
+			pkt_ipv4_work_nat(ip3, data3, &cfg->nat);
+		} else {
+			pkt_ipv6_work_nat(ip0, data0, &cfg->nat);
+			pkt_ipv6_work_nat(ip1, data1, &cfg->nat);
+			pkt_ipv6_work_nat(ip2, data2, &cfg->nat);
+			pkt_ipv6_work_nat(ip3, data3, &cfg->nat);
+		}
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TTL)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_TTL);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_TTL);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_TTL);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_TTL);
+
+		if (cfg->common.ip_version) {
+			drop_mask0 |= pkt_ipv4_work_ttl(ip0, data0);
+			drop_mask1 |= pkt_ipv4_work_ttl(ip1, data1);
+			drop_mask2 |= pkt_ipv4_work_ttl(ip2, data2);
+			drop_mask3 |= pkt_ipv4_work_ttl(ip3, data3);
+		} else {
+			drop_mask0 |= pkt_ipv6_work_ttl(ip0, data0);
+			drop_mask1 |= pkt_ipv6_work_ttl(ip1, data1);
+			drop_mask2 |= pkt_ipv6_work_ttl(ip2, data2);
+			drop_mask3 |= pkt_ipv6_work_ttl(ip3, data3);
+		}
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_STATS)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_STATS);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_STATS);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_STATS);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_STATS);
+
+		pkt_work_stats(data0, total_length0);
+		pkt_work_stats(data1, total_length1);
+		pkt_work_stats(data2, total_length2);
+		pkt_work_stats(data3, total_length3);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TIME)) {
+		void *data0 =
+			action_data_get(table_entry0, action, RTE_TABLE_ACTION_TIME);
+		void *data1 =
+			action_data_get(table_entry1, action, RTE_TABLE_ACTION_TIME);
+		void *data2 =
+			action_data_get(table_entry2, action, RTE_TABLE_ACTION_TIME);
+		void *data3 =
+			action_data_get(table_entry3, action, RTE_TABLE_ACTION_TIME);
+
+		pkt_work_time(data0, time);
+		pkt_work_time(data1, time);
+		pkt_work_time(data2, time);
+		pkt_work_time(data3, time);
+	}
+
+	return drop_mask0 |
+		(drop_mask1 << 1) |
+		(drop_mask2 << 2) |
+		(drop_mask3 << 3);
+}
+
+static __rte_always_inline int
+ah(struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	struct rte_pipeline_table_entry **entries,
+	struct rte_table_action *action,
+	struct ap_config *cfg)
+{
+	uint64_t pkts_drop_mask = 0;
+	uint64_t time = 0;
+
+	if (cfg->action_mask & ((1LLU << RTE_TABLE_ACTION_MTR) |
+		(1LLU << RTE_TABLE_ACTION_TIME)))
+		time = rte_rdtsc();
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) {
+			uint64_t drop_mask;
+
+			drop_mask = pkt4_work(&pkts[i],
+				&entries[i],
+				time,
+				action,
+				cfg);
+
+			pkts_drop_mask |= drop_mask << i;
+		}
+
+		for ( ; i < n_pkts; i++) {
+			uint64_t drop_mask;
+
+			drop_mask = pkt_work(pkts[i],
+				entries[i],
+				time,
+				action,
+				cfg);
+
+			pkts_drop_mask |= drop_mask << i;
+		}
+	} else
+		for ( ; pkts_mask; ) {
+			uint32_t pos = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pos;
+			uint64_t drop_mask;
+
+			drop_mask = pkt_work(pkts[pos],
+				entries[pos],
+				time,
+				action,
+				cfg);
+
+			pkts_mask &= ~pkt_mask;
+			pkts_drop_mask |= drop_mask << pos;
+		}
+
+	rte_pipeline_ah_packet_drop(p, pkts_drop_mask);
+
+	return 0;
+}
+
+static int
+ah_default(struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	struct rte_pipeline_table_entry **entries,
+	void *arg)
+{
+	struct rte_table_action *action = arg;
+
+	return ah(p,
+		pkts,
+		pkts_mask,
+		entries,
+		action,
+		&action->cfg);
+}
+
+static rte_pipeline_table_action_handler_hit
+ah_selector(struct rte_table_action *action)
+{
+	if (action->cfg.action_mask == (1LLU << RTE_TABLE_ACTION_FWD))
+		return NULL;
+
+	return ah_default;
+}
+
+int
+rte_table_action_table_params_get(struct rte_table_action *action,
+	struct rte_pipeline_table_params *params)
+{
+	rte_pipeline_table_action_handler_hit f_action_hit;
+	uint32_t total_size;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		(params == NULL))
+		return -EINVAL;
+
+	f_action_hit = ah_selector(action);
+	total_size = rte_align32pow2(action->data.total_size);
+
+	/* Fill in params */
+	params->f_action_hit = f_action_hit;
+	params->f_action_miss = NULL;
+	params->arg_ah = (f_action_hit) ? action : NULL;
+	params->action_data_size = total_size -
+		sizeof(struct rte_pipeline_table_entry);
+
+	return 0;
+}
+
+int
+rte_table_action_free(struct rte_table_action *action)
+{
+	if (action == NULL)
+		return 0;
+
+	rte_free(action);
+
+	return 0;
+}
diff --git a/lib/librte_pipeline/rte_table_action.h b/lib/librte_pipeline/rte_table_action.h
new file mode 100644
index 00000000..c7f751aa
--- /dev/null
+++ b/lib/librte_pipeline/rte_table_action.h
@@ -0,0 +1,905 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_ACTION_H__
+#define __INCLUDE_RTE_TABLE_ACTION_H__
+
+/**
+ * @file
+ * RTE Pipeline Table Actions
+ *
+ * This API provides a common set of actions for pipeline tables to speed up
+ * application development.
+ *
+ * Each match-action rule added to a pipeline table has associated data that
+ * stores the action context. This data is input to the table action handler
+ * called for every input packet that hits the rule as part of the table lookup
+ * during the pipeline execution. The pipeline library allows the user to define
+ * his own table actions by providing customized table action handlers (table
+ * lookup) and complete freedom of setting the rules and their data (table rule
+ * add/delete). While the user can still follow this process, this API is
+ * intended to provide a quicker development alternative for a set of predefined
+ * actions.
+ *
+ * The typical steps to use this API are:
+ *  - Define a table action profile. This is a configuration template that can
+ *    potentially be shared by multiple tables from the same or different
+ *    pipelines, with different tables from the same pipeline likely to use
+ *    different action profiles. For every table using a given action profile,
+ *    the profile defines the set of actions and the action configuration to be
+ *    implemented for all the table rules. API functions:
+ *    rte_table_action_profile_create(),
+ *    rte_table_action_profile_action_register(),
+ *    rte_table_action_profile_freeze().
+ *
+ *  - Instantiate the table action profile to create table action objects. Each
+ *    pipeline table has its own table action object. API functions:
+ *    rte_table_action_create().
+ *
+ *  - Use the table action object to generate the pipeline table action handlers
+ *    (invoked by the pipeline table lookup operation). API functions:
+ *    rte_table_action_table_params_get().
+ *
+ *  - Use the table action object to generate the rule data (for the pipeline
+ *    table rule add operation) based on given action parameters. API functions:
+ *    rte_table_action_apply().
+ *
+ *  - Use the table action object to read action data (e.g. stats counters) for
+ *    any given rule. API functions: rte_table_action_XYZ_read().
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+#include <rte_ether.h>
+#include <rte_meter.h>
+#include <rte_table_hash.h>
+
+#include "rte_pipeline.h"
+
+/** Table actions. */
+enum rte_table_action_type {
+	/** Forward to next pipeline table, output port or drop. */
+	RTE_TABLE_ACTION_FWD = 0,
+
+	/**  Load balance. */
+	RTE_TABLE_ACTION_LB,
+
+	/**  Traffic Metering and Policing. */
+	RTE_TABLE_ACTION_MTR,
+
+	/**  Traffic Management. */
+	RTE_TABLE_ACTION_TM,
+
+	/** Packet encapsulations. */
+	RTE_TABLE_ACTION_ENCAP,
+
+	/** Network Address Translation (NAT). */
+	RTE_TABLE_ACTION_NAT,
+
+	/** Time to Live (TTL) update. */
+	RTE_TABLE_ACTION_TTL,
+
+	/** Statistics. */
+	RTE_TABLE_ACTION_STATS,
+
+	/** Timestamp. */
+	RTE_TABLE_ACTION_TIME,
+};
+
+/** Common action configuration (per table action profile). */
+struct rte_table_action_common_config {
+	/** Input packet Internet Protocol (IP) version. Non-zero for IPv4, zero
+	 * for IPv6.
+	 */
+	int ip_version;
+
+	/** IP header offset within the input packet buffer. Offset 0 points to
+	 * the first byte of the MBUF structure.
+	 */
+	uint32_t ip_offset;
+};
+
+/**
+ * RTE_TABLE_ACTION_FWD
+ */
+/** Forward action parameters (per table rule). */
+struct rte_table_action_fwd_params {
+	/** Forward action. */
+	enum rte_pipeline_action action;
+
+	/** Pipeline table ID or output port ID. */
+	uint32_t id;
+};
+
+/**
+ * RTE_TABLE_ACTION_LB
+ */
+/** Load balance key size min (number of bytes). */
+#define RTE_TABLE_ACTION_LB_KEY_SIZE_MIN                    8
+
+/** Load balance key size max (number of bytes). */
+#define RTE_TABLE_ACTION_LB_KEY_SIZE_MAX                    64
+
+/** Load balance table size. */
+#define RTE_TABLE_ACTION_LB_TABLE_SIZE                      8
+
+/** Load balance action configuration (per table action profile). */
+struct rte_table_action_lb_config {
+	/** Key size (number of bytes). */
+	uint32_t key_size;
+
+	/** Key offset within the input packet buffer. Offset 0 points to the
+	 * first byte of the MBUF structure.
+	 */
+	uint32_t key_offset;
+
+	/** Key mask (*key_size* bytes are valid). */
+	uint8_t key_mask[RTE_TABLE_ACTION_LB_KEY_SIZE_MAX];
+
+	/** Hash function. */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed value for *f_hash*. */
+	uint64_t seed;
+
+	/** Output value offset within the input packet buffer. Offset 0 points
+	 * to the first byte of the MBUF structure.
+	 */
+	uint32_t out_offset;
+};
+
+/** Load balance action parameters (per table rule). */
+struct rte_table_action_lb_params {
+	/** Table defining the output values and their weights. The weights are
+	 * set in 1/RTE_TABLE_ACTION_LB_TABLE_SIZE increments. To assign a
+	 * weight of N/RTE_TABLE_ACTION_LB_TABLE_SIZE to a given output value
+	 * (0 <= N <= RTE_TABLE_ACTION_LB_TABLE_SIZE), the same output value
+	 * needs to show up exactly N times in this table.
+	 */
+	uint32_t out[RTE_TABLE_ACTION_LB_TABLE_SIZE];
+};
+
+/**
+ * RTE_TABLE_ACTION_MTR
+ */
+/** Max number of traffic classes (TCs). */
+#define RTE_TABLE_ACTION_TC_MAX                                  4
+
+/** Max number of queues per traffic class. */
+#define RTE_TABLE_ACTION_TC_QUEUE_MAX                            4
+
+/** Differentiated Services Code Point (DSCP) translation table entry. */
+struct rte_table_action_dscp_table_entry {
+	/** Traffic class. Used by the meter or the traffic management actions.
+	 * Has to be strictly smaller than *RTE_TABLE_ACTION_TC_MAX*. Traffic
+	 * class 0 is the highest priority.
+	 */
+	uint32_t tc_id;
+
+	/** Traffic class queue. Used by the traffic management action. Has to
+	 * be strictly smaller than *RTE_TABLE_ACTION_TC_QUEUE_MAX*.
+	 */
+	uint32_t tc_queue_id;
+
+	/** Packet color. Used by the meter action as the packet input color
+	 * for the color aware mode of the traffic metering algorithm.
+	 */
+	enum rte_meter_color color;
+};
+
+/** DSCP translation table. */
+struct rte_table_action_dscp_table {
+	/** Array of DSCP table entries */
+	struct rte_table_action_dscp_table_entry entry[64];
+};
+
+/** Supported traffic metering algorithms. */
+enum rte_table_action_meter_algorithm {
+	/** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */
+	RTE_TABLE_ACTION_METER_SRTCM,
+
+	/** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */
+	RTE_TABLE_ACTION_METER_TRTCM,
+};
+
+/** Traffic metering profile (configuration template). */
+struct rte_table_action_meter_profile {
+	/** Traffic metering algorithm. */
+	enum rte_table_action_meter_algorithm alg;
+
+	RTE_STD_C11
+	union {
+		/** Only valid when *alg* is set to srTCM - IETF RFC 2697. */
+		struct rte_meter_srtcm_params srtcm;
+
+		/** Only valid when *alg* is set to trTCM - IETF RFC 2698. */
+		struct rte_meter_trtcm_params trtcm;
+	};
+};
+
+/** Policer actions. */
+enum rte_table_action_policer {
+	/** Recolor the packet as green. */
+	RTE_TABLE_ACTION_POLICER_COLOR_GREEN = 0,
+
+	/** Recolor the packet as yellow. */
+	RTE_TABLE_ACTION_POLICER_COLOR_YELLOW,
+
+	/** Recolor the packet as red. */
+	RTE_TABLE_ACTION_POLICER_COLOR_RED,
+
+	/** Drop the packet. */
+	RTE_TABLE_ACTION_POLICER_DROP,
+
+	/** Number of policer actions. */
+	RTE_TABLE_ACTION_POLICER_MAX
+};
+
+/** Meter action configuration per traffic class. */
+struct rte_table_action_mtr_tc_params {
+	/** Meter profile ID. */
+	uint32_t meter_profile_id;
+
+	/** Policer actions. */
+	enum rte_table_action_policer policer[e_RTE_METER_COLORS];
+};
+
+/** Meter action statistics counters per traffic class. */
+struct rte_table_action_mtr_counters_tc {
+	/** Number of packets per color at the output of the traffic metering
+	 * and before the policer actions are executed. Only valid when
+	 * *n_packets_valid* is non-zero.
+	 */
+	uint64_t n_packets[e_RTE_METER_COLORS];
+
+	/** Number of packet bytes per color at the output of the traffic
+	 * metering and before the policer actions are executed. Only valid when
+	 * *n_bytes_valid* is non-zero.
+	 */
+	uint64_t n_bytes[e_RTE_METER_COLORS];
+
+	/** When non-zero, the *n_packets* field is valid. */
+	int n_packets_valid;
+
+	/** When non-zero, the *n_bytes* field is valid. */
+	int n_bytes_valid;
+};
+
+/** Meter action configuration (per table action profile). */
+struct rte_table_action_mtr_config {
+	/** Meter algorithm. */
+	enum rte_table_action_meter_algorithm alg;
+
+	/** Number of traffic classes. Each traffic class has its own traffic
+	 * meter and policer instances. Needs to be equal to either 1 or to
+	 * *RTE_TABLE_ACTION_TC_MAX*.
+	 */
+	uint32_t n_tc;
+
+	/** When non-zero, the *n_packets* meter stats counter is enabled,
+	 * otherwise it is disabled.
+	 *
+	 * @see struct rte_table_action_mtr_counters_tc
+	 */
+	int n_packets_enabled;
+
+	/** When non-zero, the *n_bytes* meter stats counter is enabled,
+	 * otherwise it is disabled.
+	 *
+	 * @see struct rte_table_action_mtr_counters_tc
+	 */
+	int n_bytes_enabled;
+};
+
+/** Meter action parameters (per table rule). */
+struct rte_table_action_mtr_params {
+	/** Traffic meter and policer parameters for each of the *tc_mask*
+	 * traffic classes.
+	 */
+	struct rte_table_action_mtr_tc_params mtr[RTE_TABLE_ACTION_TC_MAX];
+
+	/** Bit mask defining which traffic class parameters are valid in *mtr*.
+	 * If bit N is set in *tc_mask*, then parameters for traffic class N are
+	 * valid in *mtr*.
+	 */
+	uint32_t tc_mask;
+};
+
+/** Meter action statistics counters (per table rule). */
+struct rte_table_action_mtr_counters {
+	/** Stats counters for each of the *tc_mask* traffic classes. */
+	struct rte_table_action_mtr_counters_tc stats[RTE_TABLE_ACTION_TC_MAX];
+
+	/** Bit mask defining which traffic class parameters are valid in *mtr*.
+	 * If bit N is set in *tc_mask*, then parameters for traffic class N are
+	 * valid in *mtr*.
+	 */
+	uint32_t tc_mask;
+};
+
+/**
+ * RTE_TABLE_ACTION_TM
+ */
+/** Traffic management action configuration (per table action profile). */
+struct rte_table_action_tm_config {
+	/** Number of subports per port. */
+	uint32_t n_subports_per_port;
+
+	/** Number of pipes per subport. */
+	uint32_t n_pipes_per_subport;
+};
+
+/** Traffic management action parameters (per table rule). */
+struct rte_table_action_tm_params {
+	/** Subport ID. */
+	uint32_t subport_id;
+
+	/** Pipe ID. */
+	uint32_t pipe_id;
+};
+
+/**
+ * RTE_TABLE_ACTION_ENCAP
+ */
+/** Supported packet encapsulation types. */
+enum rte_table_action_encap_type {
+	/** IP -> { Ether | IP } */
+	RTE_TABLE_ACTION_ENCAP_ETHER = 0,
+
+	/** IP -> { Ether | VLAN | IP } */
+	RTE_TABLE_ACTION_ENCAP_VLAN,
+
+	/** IP -> { Ether | S-VLAN | C-VLAN | IP } */
+	RTE_TABLE_ACTION_ENCAP_QINQ,
+
+	/** IP -> { Ether | MPLS | IP } */
+	RTE_TABLE_ACTION_ENCAP_MPLS,
+
+	/** IP -> { Ether | PPPoE | PPP | IP } */
+	RTE_TABLE_ACTION_ENCAP_PPPOE,
+};
+
+/** Pre-computed Ethernet header fields for encapsulation action. */
+struct rte_table_action_ether_hdr {
+	struct ether_addr da; /**< Destination address. */
+	struct ether_addr sa; /**< Source address. */
+};
+
+/** Pre-computed VLAN header fields for encapsulation action. */
+struct rte_table_action_vlan_hdr {
+	uint8_t pcp; /**< Priority Code Point (PCP). */
+	uint8_t dei; /**< Drop Eligibility Indicator (DEI). */
+	uint16_t vid; /**< VLAN Identifier (VID). */
+};
+
+/** Pre-computed MPLS header fields for encapsulation action. */
+struct rte_table_action_mpls_hdr {
+	uint32_t label; /**< Label. */
+	uint8_t tc; /**< Traffic Class (TC). */
+	uint8_t ttl; /**< Time to Live (TTL). */
+};
+
+/** Pre-computed PPPoE header fields for encapsulation action. */
+struct rte_table_action_pppoe_hdr {
+	uint16_t session_id; /**< Session ID. */
+};
+
+/** Ether encap parameters. */
+struct rte_table_action_encap_ether_params {
+	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+};
+
+/** VLAN encap parameters. */
+struct rte_table_action_encap_vlan_params {
+	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+	struct rte_table_action_vlan_hdr vlan; /**< VLAN header. */
+};
+
+/** QinQ encap parameters. */
+struct rte_table_action_encap_qinq_params {
+	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+	struct rte_table_action_vlan_hdr svlan; /**< Service VLAN header. */
+	struct rte_table_action_vlan_hdr cvlan; /**< Customer VLAN header. */
+};
+
+/** Max number of MPLS labels per output packet for MPLS encapsulation. */
+#ifndef RTE_TABLE_ACTION_MPLS_LABELS_MAX
+#define RTE_TABLE_ACTION_MPLS_LABELS_MAX                   4
+#endif
+
+/** MPLS encap parameters. */
+struct rte_table_action_encap_mpls_params {
+	/** Ethernet header. */
+	struct rte_table_action_ether_hdr ether;
+
+	/** MPLS header. */
+	struct rte_table_action_mpls_hdr mpls[RTE_TABLE_ACTION_MPLS_LABELS_MAX];
+
+	/** Number of MPLS labels in MPLS header. */
+	uint32_t mpls_count;
+
+	/** Non-zero for MPLS unicast, zero for MPLS multicast. */
+	int unicast;
+};
+
+/** PPPoE encap parameters. */
+struct rte_table_action_encap_pppoe_params {
+	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+	struct rte_table_action_pppoe_hdr pppoe; /**< PPPoE/PPP headers. */
+};
+
+/** Encap action configuration (per table action profile). */
+struct rte_table_action_encap_config {
+	/** Bit mask defining the set of packet encapsulations enabled for the
+	 * current table action profile. If bit (1 << N) is set in *encap_mask*,
+	 * then packet encapsulation N is enabled, otherwise it is disabled.
+	 *
+	 * @see enum rte_table_action_encap_type
+	 */
+	uint64_t encap_mask;
+};
+
+/** Encap action parameters (per table rule). */
+struct rte_table_action_encap_params {
+	/** Encapsulation type. */
+	enum rte_table_action_encap_type type;
+
+	RTE_STD_C11
+	union {
+		/** Only valid when *type* is set to Ether. */
+		struct rte_table_action_encap_ether_params ether;
+
+		/** Only valid when *type* is set to VLAN. */
+		struct rte_table_action_encap_vlan_params vlan;
+
+		/** Only valid when *type* is set to QinQ. */
+		struct rte_table_action_encap_qinq_params qinq;
+
+		/** Only valid when *type* is set to MPLS. */
+		struct rte_table_action_encap_mpls_params mpls;
+
+		/** Only valid when *type* is set to PPPoE. */
+		struct rte_table_action_encap_pppoe_params pppoe;
+	};
+};
+
+/**
+ * RTE_TABLE_ACTION_NAT
+ */
+/** NAT action configuration (per table action profile). */
+struct rte_table_action_nat_config {
+	/** When non-zero, the IP source address and L4 protocol source port are
+	 * translated. When zero, the IP destination address and L4 protocol
+	 * destination port are translated.
+	 */
+	int source_nat;
+
+	/** Layer 4 protocol, for example TCP (0x06) or UDP (0x11). The checksum
+	 * field is computed differently and placed at different header offset
+	 * by each layer 4 protocol.
+	 */
+	uint8_t proto;
+};
+
+/** NAT action parameters (per table rule). */
+struct rte_table_action_nat_params {
+	/** IP version for *addr*: non-zero for IPv4, zero for IPv6. */
+	int ip_version;
+
+	/** IP address. */
+	union {
+		/** IPv4 address; only valid when *ip_version* is non-zero. */
+		uint32_t ipv4;
+
+		/** IPv6 address; only valid when *ip_version* is set to 0. */
+		uint8_t ipv6[16];
+	} addr;
+
+	/** Port. */
+	uint16_t port;
+};
+
+/**
+ * RTE_TABLE_ACTION_TTL
+ */
+/** TTL action configuration (per table action profile). */
+struct rte_table_action_ttl_config {
+	/** When non-zero, the input packets whose updated IPv4 Time to Live
+	 * (TTL) field or IPv6 Hop Limit (HL) field is zero are dropped.
+	 * When zero, the input packets whose updated IPv4 TTL field or IPv6 HL
+	 * field is zero are forwarded as usual (typically for debugging
+	 * purpose).
+	 */
+	int drop;
+
+	/** When non-zero, the *n_packets* stats counter for TTL action is
+	 * enabled, otherwise disabled.
+	 *
+	 * @see struct rte_table_action_ttl_counters
+	 */
+	int n_packets_enabled;
+};
+
+/** TTL action parameters (per table rule). */
+struct rte_table_action_ttl_params {
+	/** When non-zero, decrement the IPv4 TTL field and update the checksum
+	 * field, or decrement the IPv6 HL field. When zero, the IPv4 TTL field
+	 * or the IPv6 HL field is not changed.
+	 */
+	int decrement;
+};
+
+/** TTL action statistics packets (per table rule). */
+struct rte_table_action_ttl_counters {
+	/** Number of IPv4 packets whose updated TTL field is zero or IPv6
+	 * packets whose updated HL field is zero.
+	 */
+	uint64_t n_packets;
+};
+
+/**
+ * RTE_TABLE_ACTION_STATS
+ */
+/** Stats action configuration (per table action profile). */
+struct rte_table_action_stats_config {
+	/** When non-zero, the *n_packets* stats counter is enabled, otherwise
+	 * disabled.
+	 *
+	 * @see struct rte_table_action_stats_counters
+	 */
+	int n_packets_enabled;
+
+	/** When non-zero, the *n_bytes* stats counter is enabled, otherwise
+	 * disabled.
+	 *
+	 * @see struct rte_table_action_stats_counters
+	 */
+	int n_bytes_enabled;
+};
+
+/** Stats action parameters (per table rule). */
+struct rte_table_action_stats_params {
+	/** Initial value for the *n_packets* stats counter. Typically set to 0.
+	 *
+	 * @see struct rte_table_action_stats_counters
+	 */
+	uint64_t n_packets;
+
+	/** Initial value for the *n_bytes* stats counter. Typically set to 0.
+	 *
+	 * @see struct rte_table_action_stats_counters
+	 */
+	uint64_t n_bytes;
+};
+
+/** Stats action counters (per table rule). */
+struct rte_table_action_stats_counters {
+	/** Number of packets. Valid only when *n_packets_valid* is non-zero. */
+	uint64_t n_packets;
+
+	/** Number of bytes. Valid only when *n_bytes_valid* is non-zero. */
+	uint64_t n_bytes;
+
+	/** When non-zero, the *n_packets* field is valid, otherwise invalid. */
+	int n_packets_valid;
+
+	/** When non-zero, the *n_bytes* field is valid, otherwise invalid. */
+	int n_bytes_valid;
+};
+
+/**
+ * RTE_TABLE_ACTION_TIME
+ */
+/** Timestamp action parameters (per table rule). */
+struct rte_table_action_time_params {
+	/** Initial timestamp value. Typically set to current time. */
+	uint64_t time;
+};
+
+/**
+ * Table action profile.
+ */
+struct rte_table_action_profile;
+
+/**
+ * Table action profile create.
+ *
+ * @param[in] common
+ *   Common action configuration.
+ * @return
+ *   Table action profile handle on success, NULL otherwise.
+ */
+struct rte_table_action_profile * __rte_experimental
+rte_table_action_profile_create(struct rte_table_action_common_config *common);
+
+/**
+ * Table action profile free.
+ *
+ * @param[in] profile
+ *   Table profile action handle (needs to be valid).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_profile_free(struct rte_table_action_profile *profile);
+
+/**
+ * Table action profile action register.
+ *
+ * @param[in] profile
+ *   Table profile action handle (needs to be valid and not in frozen state).
+ * @param[in] type
+ *   Specific table action to be registered for *profile*.
+ * @param[in] action_config
+ *   Configuration for the *type* action.
+ *   If struct rte_table_action_*type*_config is defined by the Table Action
+ *   API, it needs to point to a valid instance of this structure, otherwise it
+ *   needs to be set to NULL.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_profile_action_register(struct rte_table_action_profile *profile,
+	enum rte_table_action_type type,
+	void *action_config);
+
+/**
+ * Table action profile freeze.
+ *
+ * Once this function is called successfully, the given profile enters the
+ * frozen state with the following immediate effects: no more actions can be
+ * registered for this profile, so the profile can be instantiated to create
+ * table action objects.
+ *
+ * @param[in] profile
+ *   Table profile action handle (needs to be valid and not in frozen state).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ *
+ * @see rte_table_action_create()
+ */
+int __rte_experimental
+rte_table_action_profile_freeze(struct rte_table_action_profile *profile);
+
+/**
+ * Table action.
+ */
+struct rte_table_action;
+
+/**
+ * Table action create.
+ *
+ * Instantiates the given table action profile to create a table action object.
+ *
+ * @param[in] profile
+ *   Table profile action handle (needs to be valid and in frozen state).
+ * @param[in] socket_id
+ *   CPU socket ID where the internal data structures required by the new table
+ *   action object should be allocated.
+ * @return
+ *   Handle to table action object on success, NULL on error.
+ *
+ * @see rte_table_action_create()
+ */
+struct rte_table_action * __rte_experimental
+rte_table_action_create(struct rte_table_action_profile *profile,
+	uint32_t socket_id);
+
+/**
+ * Table action free.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_free(struct rte_table_action *action);
+
+/**
+ * Table action table params get.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[inout] params
+ *   Pipeline table parameters (needs to be pre-allocated).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_table_params_get(struct rte_table_action *action,
+	struct rte_pipeline_table_params *params);
+
+/**
+ * Table action apply.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) to apply action *type* on.
+ * @param[in] type
+ *   Specific table action previously registered for the table action profile of
+ *   the *action* object.
+ * @param[in] action_params
+ *   Parameters for the *type* action.
+ *   If struct rte_table_action_*type*_params is defined by the Table Action
+ *   API, it needs to point to a valid instance of this structure, otherwise it
+ *   needs to be set to NULL.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_apply(struct rte_table_action *action,
+	void *data,
+	enum rte_table_action_type type,
+	void *action_params);
+
+/**
+ * Table action DSCP table update.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] dscp_mask
+ *   64-bit mask defining the DSCP table entries to be updated. If bit N is set
+ *   in this bit mask, then DSCP table entry N is to be updated, otherwise not.
+ * @param[in] table
+ *   DSCP table.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_dscp_table_update(struct rte_table_action *action,
+	uint64_t dscp_mask,
+	struct rte_table_action_dscp_table *table);
+
+/**
+ * Table action meter profile add.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] meter_profile_id
+ *   Meter profile ID to be used for the *profile* once it is successfully added
+ *   to the *action* object (needs to be unused by the set of meter profiles
+ *   currently registered for the *action* object).
+ * @param[in] profile
+ *   Meter profile to be added.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_meter_profile_add(struct rte_table_action *action,
+	uint32_t meter_profile_id,
+	struct rte_table_action_meter_profile *profile);
+
+/**
+ * Table action meter profile delete.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] meter_profile_id
+ *   Meter profile ID of the meter profile to be deleted from the *action*
+ *   object (needs to be valid for the *action* object).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_meter_profile_delete(struct rte_table_action *action,
+	uint32_t meter_profile_id);
+
+/**
+ * Table action meter read.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with meter action previously
+ *   applied on it.
+ * @param[in] tc_mask
+ *   Bit mask defining which traffic classes should have the meter stats
+ *   counters read from *data* and stored into *stats*. If bit N is set in this
+ *   bit mask, then traffic class N is part of this operation, otherwise it is
+ *   not. If bit N is set in this bit mask, then traffic class N must be one of
+ *   the traffic classes that are enabled for the meter action in the table
+ *   action profile used by the *action* object.
+ * @param[inout] stats
+ *   When non-NULL, it points to the area where the meter stats counters read
+ *   from *data* are saved. Only the meter stats counters for the *tc_mask*
+ *   traffic classes are read and stored to *stats*.
+ * @param[in] clear
+ *   When non-zero, the meter stats counters are cleared (i.e. set to zero),
+ *   otherwise the counters are not modified. When the read operation is enabled
+ *   (*stats* is non-NULL), the clear operation is performed after the read
+ *   operation is completed.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_meter_read(struct rte_table_action *action,
+	void *data,
+	uint32_t tc_mask,
+	struct rte_table_action_mtr_counters *stats,
+	int clear);
+
+/**
+ * Table action TTL read.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with TTL action previously
+ *   applied on it.
+ * @param[inout] stats
+ *   When non-NULL, it points to the area where the TTL stats counters read from
+ *   *data* are saved.
+ * @param[in] clear
+ *   When non-zero, the TTL stats counters are cleared (i.e. set to zero),
+ *   otherwise the counters are not modified. When the read operation is enabled
+ *   (*stats* is non-NULL), the clear operation is performed after the read
+ *   operation is completed.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_ttl_read(struct rte_table_action *action,
+	void *data,
+	struct rte_table_action_ttl_counters *stats,
+	int clear);
+
+/**
+ * Table action stats read.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with stats action previously
+ *   applied on it.
+ * @param[inout] stats
+ *   When non-NULL, it points to the area where the stats counters read from
+ *   *data* are saved.
+ * @param[in] clear
+ *   When non-zero, the stats counters are cleared (i.e. set to zero), otherwise
+ *   the counters are not modified. When the read operation is enabled (*stats*
+ *   is non-NULL), the clear operation is performed after the read operation is
+ *   completed.
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_stats_read(struct rte_table_action *action,
+	void *data,
+	struct rte_table_action_stats_counters *stats,
+	int clear);
+
+/**
+ * Table action timestamp read.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with timestamp action
+ *   previously applied on it.
+ * @param[inout] timestamp
+ *   Pre-allocated memory where the timestamp read from *data* is saved (has to
+ *   be non-NULL).
+ * @return
+ *   Zero on success, non-zero error code otherwise.
+ */
+int __rte_experimental
+rte_table_action_time_read(struct rte_table_action *action,
+	void *data,
+	uint64_t *timestamp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_TABLE_ACTION_H__ */
diff --git a/lib/librte_port/meson.build b/lib/librte_port/meson.build
index debb5eb9..f3d8b443 100644
--- a/lib/librte_port/meson.build
+++ b/lib/librte_port/meson.build
@@ -5,23 +5,21 @@ version = 3
 sources = files(
 	'rte_port_ethdev.c',
 	'rte_port_fd.c',
+	'rte_port_frag.c',
+	'rte_port_ras.c',
 	'rte_port_ring.c',
 	'rte_port_sched.c',
 	'rte_port_source_sink.c')
 headers = files(
 	'rte_port_ethdev.h',
 	'rte_port_fd.h',
+	'rte_port_frag.h',
+	'rte_port_ras.h',
 	'rte_port.h',
 	'rte_port_ring.h',
 	'rte_port_sched.h',
 	'rte_port_source_sink.h')
-deps += ['ethdev', 'sched']
-
-if dpdk_conf.has('RTE_LIBRTE_IP_FRAG')
-	sources += files('rte_port_frag.c', 'rte_port_ras.c')
-	headers += files('rte_port_frag.h', 'rte_port_ras.h')
-	deps += ['ip_frag']
-endif
+deps += ['ethdev', 'sched', 'ip_frag']
 
 if dpdk_conf.has('RTE_LIBRTE_KNI')
 	sources += files('rte_port_kni.c')
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
index 5e8b4ab5..ee638eef 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -48,7 +48,8 @@ enum workload {HIGH, MEDIUM, LOW};
 enum policy_to_use {
 	TRAFFIC,
 	TIME,
-	WORKLOAD
+	WORKLOAD,
+	BRANCH_RATIO
 };
 
 struct traffic {
diff --git a/lib/librte_power/power_acpi_cpufreq.c b/lib/librte_power/power_acpi_cpufreq.c
index bce933e9..cd5978d5 100644
--- a/lib/librte_power/power_acpi_cpufreq.c
+++ b/lib/librte_power/power_acpi_cpufreq.c
@@ -623,3 +623,24 @@ power_acpi_disable_turbo(unsigned int lcore_id)
 
 	return 0;
 }
+
+int power_acpi_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+	if (caps == NULL) {
+		RTE_LOG(ERR, POWER, "Invalid argument\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	caps->capabilities = 0;
+	caps->turbo = !!(pi->turbo_available);
+
+	return 0;
+}
diff --git a/lib/librte_power/power_acpi_cpufreq.h b/lib/librte_power/power_acpi_cpufreq.h
index edeeb27a..1af74160 100644
--- a/lib/librte_power/power_acpi_cpufreq.h
+++ b/lib/librte_power/power_acpi_cpufreq.h
@@ -14,6 +14,7 @@
 #include <rte_byteorder.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
+#include "rte_power.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -196,6 +197,21 @@ int power_acpi_enable_turbo(unsigned int lcore_id);
  */
 int power_acpi_disable_turbo(unsigned int lcore_id);
 
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int power_acpi_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_power/power_kvm_vm.c b/lib/librte_power/power_kvm_vm.c
index 38e9066f..20659b72 100644
--- a/lib/librte_power/power_kvm_vm.c
+++ b/lib/librte_power/power_kvm_vm.c
@@ -124,3 +124,11 @@ power_kvm_vm_disable_turbo(unsigned int lcore_id)
 {
 	return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO);
 }
+
+struct rte_power_core_capabilities;
+int power_kvm_vm_get_capabilities(__rte_unused unsigned int lcore_id,
+		__rte_unused struct rte_power_core_capabilities *caps)
+{
+	RTE_LOG(ERR, POWER, "rte_power_get_capabilities is not implemented for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
diff --git a/lib/librte_power/power_kvm_vm.h b/lib/librte_power/power_kvm_vm.h
index 446d6997..94d4aa12 100644
--- a/lib/librte_power/power_kvm_vm.h
+++ b/lib/librte_power/power_kvm_vm.h
@@ -14,6 +14,7 @@
 #include <rte_byteorder.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
+#include "rte_power.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -177,6 +178,22 @@ int power_kvm_vm_enable_turbo(unsigned int lcore_id);
  *  - Negative on error.
  */
 int power_kvm_vm_disable_turbo(unsigned int lcore_id);
+
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int power_kvm_vm_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 6c8fb403..208b7919 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -24,6 +24,7 @@ rte_power_freq_change_t rte_power_freq_min = NULL;
 rte_power_freq_change_t rte_power_turbo_status;
 rte_power_freq_change_t rte_power_freq_enable_turbo;
 rte_power_freq_change_t rte_power_freq_disable_turbo;
+rte_power_get_capabilities_t rte_power_get_capabilities;
 
 int
 rte_power_set_env(enum power_management_env env)
@@ -42,6 +43,7 @@ rte_power_set_env(enum power_management_env env)
 		rte_power_turbo_status = power_acpi_turbo_status;
 		rte_power_freq_enable_turbo = power_acpi_enable_turbo;
 		rte_power_freq_disable_turbo = power_acpi_disable_turbo;
+		rte_power_get_capabilities = power_acpi_get_capabilities;
 	} else if (env == PM_ENV_KVM_VM) {
 		rte_power_freqs = power_kvm_vm_freqs;
 		rte_power_get_freq = power_kvm_vm_get_freq;
@@ -53,6 +55,7 @@ rte_power_set_env(enum power_management_env env)
 		rte_power_turbo_status = power_kvm_vm_turbo_status;
 		rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo;
 		rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo;
+		rte_power_get_capabilities = power_kvm_vm_get_capabilities;
 	} else {
 		RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
 				env);
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index b4b7357b..d70bc0b3 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -247,6 +247,38 @@ extern rte_power_freq_change_t rte_power_freq_enable_turbo;
  */
 extern rte_power_freq_change_t rte_power_freq_disable_turbo;
 
+/**
+ * Power capabilities summary.
+ */
+struct rte_power_core_capabilities {
+	RTE_STD_C11
+	union {
+		uint64_t capabilities;
+		RTE_STD_C11
+		struct {
+			uint64_t turbo:1;	/**< Turbo can be enabled. */
+		};
+	};
+};
+
+/**
+ * Returns power capabilities for a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+typedef int (*rte_power_get_capabilities_t)(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
+extern rte_power_get_capabilities_t rte_power_get_capabilities;
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
index 96dc42ec..dd587dfb 100644
--- a/lib/librte_power/rte_power_version.map
+++ b/lib/librte_power/rte_power_version.map
@@ -25,4 +25,11 @@ DPDK_17.11 {
 	rte_power_freq_enable_turbo;
 	rte_power_turbo_status;
 
-} DPDK_2.0;
-\ No newline at end of file
+} DPDK_2.0;
+
+DPDK_18.08 {
+	global:
+
+	rte_power_get_capabilities;
+
+} DPDK_17.11;
diff --git a/lib/librte_rawdev/Makefile b/lib/librte_rawdev/Makefile
index b9105b06..addb288d 100644
--- a/lib/librte_rawdev/Makefile
+++ b/lib/librte_rawdev/Makefile
@@ -10,7 +10,6 @@ LIB = librte_rawdev.a
 LIBABIVER := 1
 
 # build flags
-CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal
diff --git a/lib/librte_rawdev/meson.build b/lib/librte_rawdev/meson.build
new file mode 100644
index 00000000..a20fbdc0
--- /dev/null
+++ b/lib/librte_rawdev/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+sources = files('rte_rawdev.c')
+headers = files('rte_rawdev.h', 'rte_rawdev_pmd.h')
diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c
index d314ef96..62b6b97e 100644
--- a/lib/librte_rawdev/rte_rawdev.c
+++ b/lib/librte_rawdev/rte_rawdev.c
@@ -46,13 +46,13 @@ static struct rte_rawdev_global rawdev_globals = {
 struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals;
 
 /* Raw device, northbound API implementation */
-uint8_t __rte_experimental
+uint8_t
 rte_rawdev_count(void)
 {
 	return rte_rawdev_globals->nb_devs;
 }
 
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_get_dev_id(const char *name)
 {
 	uint16_t i;
@@ -69,7 +69,7 @@ rte_rawdev_get_dev_id(const char *name)
 	return -ENODEV;
 }
 
-int __rte_experimental
+int
 rte_rawdev_socket_id(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -80,7 +80,7 @@ rte_rawdev_socket_id(uint16_t dev_id)
 	return dev->socket_id;
 }
 
-int __rte_experimental
+int
 rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
 {
 	struct rte_rawdev *rawdev;
@@ -88,9 +88,6 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
 	RTE_FUNC_PTR_OR_ERR_RET(dev_info, -EINVAL);
 
-	if (dev_info == NULL)
-		return -EINVAL;
-
 	rawdev = &rte_rawdevs[dev_id];
 
 	RTE_FUNC_PTR_OR_ERR_RET(*rawdev->dev_ops->dev_info_get, -ENOTSUP);
@@ -105,7 +102,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
 {
 	struct rte_rawdev *dev;
@@ -134,7 +131,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
 	return diag;
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_conf_get(uint16_t dev_id,
 			  uint16_t queue_id,
 			  rte_rawdev_obj_t queue_conf)
@@ -149,7 +146,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_setup(uint16_t dev_id,
 		       uint16_t queue_id,
 		       rte_rawdev_obj_t queue_conf)
@@ -163,7 +160,7 @@ rte_rawdev_queue_setup(uint16_t dev_id,
 	return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf);
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
 {
 	struct rte_rawdev *dev;
@@ -175,7 +172,19 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
 	return (*dev->dev_ops->queue_release)(dev, queue_id);
 }
 
-int __rte_experimental
+uint16_t
+rte_rawdev_queue_count(uint16_t dev_id)
+{
+	struct rte_rawdev *dev;
+
+	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	dev = &rte_rawdevs[dev_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_count, -ENOTSUP);
+	return (*dev->dev_ops->queue_count)(dev);
+}
+
+int
 rte_rawdev_get_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    uint64_t *attr_value)
@@ -189,7 +198,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
 	return (*dev->dev_ops->attr_get)(dev, attr_name, attr_value);
 }
 
-int __rte_experimental
+int
 rte_rawdev_set_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    const uint64_t attr_value)
@@ -203,7 +212,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
 	return (*dev->dev_ops->attr_set)(dev, attr_name, attr_value);
 }
 
-int __rte_experimental
+int
 rte_rawdev_enqueue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -218,7 +227,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
 	return (*dev->dev_ops->enqueue_bufs)(dev, buffers, count, context);
 }
 
-int __rte_experimental
+int
 rte_rawdev_dequeue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -233,7 +242,7 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
 	return (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context);
 }
 
-int __rte_experimental
+int
 rte_rawdev_dump(uint16_t dev_id, FILE *f)
 {
 	struct rte_rawdev *dev;
@@ -254,7 +263,7 @@ xstats_get_count(uint16_t dev_id)
 	return (*dev->dev_ops->xstats_get_names)(dev, NULL, 0);
 }
 
-int __rte_experimental
+int
 rte_rawdev_xstats_names_get(uint16_t dev_id,
 		struct rte_rawdev_xstats_name *xstats_names,
 		unsigned int size)
@@ -277,7 +286,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
 }
 
 /* retrieve rawdev extended statistics */
-int __rte_experimental
+int
 rte_rawdev_xstats_get(uint16_t dev_id,
 		      const unsigned int ids[],
 		      uint64_t values[],
@@ -290,7 +299,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_get)(dev, ids, values, n);
 }
 
-uint64_t __rte_experimental
+uint64_t
 rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 			      const char *name,
 			      unsigned int *id)
@@ -309,7 +318,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_get_by_name)(dev, name, id);
 }
 
-int __rte_experimental
+int
 rte_rawdev_xstats_reset(uint16_t dev_id,
 			const uint32_t ids[], uint32_t nb_ids)
 {
@@ -320,7 +329,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_reset)(dev, ids, nb_ids);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -330,7 +339,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
 	return (*dev->dev_ops->firmware_status_get)(dev, status_info);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -340,7 +349,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
 	return (*dev->dev_ops->firmware_version_get)(dev, version_info);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -353,7 +362,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
 	return (*dev->dev_ops->firmware_load)(dev, firmware_image);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_unload(uint16_t dev_id)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -363,7 +372,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id)
 	return (*dev->dev_ops->firmware_unload)(dev);
 }
 
-int __rte_experimental
+int
 rte_rawdev_selftest(uint16_t dev_id)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -373,7 +382,7 @@ rte_rawdev_selftest(uint16_t dev_id)
 	return (*dev->dev_ops->dev_selftest)();
 }
 
-int __rte_experimental
+int
 rte_rawdev_start(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -400,7 +409,7 @@ rte_rawdev_start(uint16_t dev_id)
 	return 0;
 }
 
-void __rte_experimental
+void
 rte_rawdev_stop(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -422,7 +431,7 @@ rte_rawdev_stop(uint16_t dev_id)
 	dev->started = 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_close(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -441,7 +450,7 @@ rte_rawdev_close(uint16_t dev_id)
 	return (*dev->dev_ops->dev_close)(dev);
 }
 
-int __rte_experimental
+int
 rte_rawdev_reset(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -468,7 +477,7 @@ rte_rawdev_find_free_device_index(void)
 	return RTE_RAWDEV_MAX_DEVS;
 }
 
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
 {
 	struct rte_rawdev *rawdev;
@@ -509,7 +518,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
 	return rawdev;
 }
 
-int __rte_experimental
+int
 rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
 {
 	int ret;
@@ -535,10 +544,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
 	return 0;
 }
 
-RTE_INIT(librawdev_init_log);
-
-static void
-librawdev_init_log(void)
+RTE_INIT(librawdev_init_log)
 {
 	librawdev_logtype = rte_log_register("lib.rawdev");
 	if (librawdev_logtype >= 0)
diff --git a/lib/librte_rawdev/rte_rawdev.h b/lib/librte_rawdev/rte_rawdev.h
index 2e14919b..684bfdb8 100644
--- a/lib/librte_rawdev/rte_rawdev.h
+++ b/lib/librte_rawdev/rte_rawdev.h
@@ -35,7 +35,7 @@ typedef void *rte_rawdev_obj_t;
  * @return
  *   The total number of usable raw devices.
  */
-uint8_t __rte_experimental
+uint8_t
 rte_rawdev_count(void);
 
 /**
@@ -48,7 +48,7 @@ rte_rawdev_count(void);
  *   Returns raw device identifier on success.
  *   - <0: Failure to find named raw device.
  */
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_get_dev_id(const char *name);
 
 /**
@@ -61,7 +61,7 @@ rte_rawdev_get_dev_id(const char *name);
  *   a default of zero if the socket could not be determined.
  *   -(-EINVAL)  dev_id value is out of range.
  */
-int __rte_experimental
+int
 rte_rawdev_socket_id(uint16_t dev_id);
 
 /**
@@ -84,7 +84,7 @@ struct rte_rawdev_info;
  *   - <0: Error code returned by the driver info get function.
  *
  */
-int __rte_experimental
+int
 rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
 
 /**
@@ -111,7 +111,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
  *   - 0: Success, device configured.
  *   - <0: Error code returned by the driver configuration function.
  */
-int __rte_experimental
+int
 rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
 
 
@@ -137,7 +137,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
  * @see rte_raw_queue_setup()
  *
  */
-int __rte_experimental
+int
 rte_rawdev_queue_conf_get(uint16_t dev_id,
 			  uint16_t queue_id,
 			  rte_rawdev_obj_t queue_conf);
@@ -160,7 +160,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
  *   - 0: Success, raw queue correctly set up.
  *   - <0: raw queue configuration failed
  */
-int __rte_experimental
+int
 rte_rawdev_queue_setup(uint16_t dev_id,
 		       uint16_t queue_id,
 		       rte_rawdev_obj_t queue_conf);
@@ -180,8 +180,9 @@ rte_rawdev_queue_setup(uint16_t dev_id,
  *   - 0: Success, raw queue released.
  *   - <0: raw queue configuration failed
  */
-int __rte_experimental
+int
 rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
+
 /**
  * Get the number of raw queues on a specific raw device
  *
@@ -190,7 +191,7 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
  * @return
  *   - The number of configured raw queues
  */
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_queue_count(uint16_t dev_id);
 
 /**
@@ -208,7 +209,7 @@ rte_rawdev_queue_count(uint16_t dev_id);
  *   - 0: Success, device started.
  *   < 0: Failure
  */
-int __rte_experimental
+int
 rte_rawdev_start(uint16_t dev_id);
 
 /**
@@ -218,7 +219,7 @@ rte_rawdev_start(uint16_t dev_id);
  * @param dev_id
  *   Raw device identifier.
  */
-void __rte_experimental
+void
 rte_rawdev_stop(uint16_t dev_id);
 
 /**
@@ -232,7 +233,7 @@ rte_rawdev_stop(uint16_t dev_id);
  *  - <0 on failure to close device
  *  - (-EAGAIN) if device is busy
  */
-int __rte_experimental
+int
 rte_rawdev_close(uint16_t dev_id);
 
 /**
@@ -246,7 +247,7 @@ rte_rawdev_close(uint16_t dev_id);
  *   0 for sucessful reset,
  *  !0 for failure in resetting
  */
-int __rte_experimental
+int
 rte_rawdev_reset(uint16_t dev_id);
 
 #define RTE_RAWDEV_NAME_MAX_LEN	(64)
@@ -316,7 +317,7 @@ struct rte_rawdev_buf {
  *   - 0: on success
  *   - <0: on failure.
  */
-int __rte_experimental
+int
 rte_rawdev_dump(uint16_t dev_id, FILE *f);
 
 /**
@@ -338,7 +339,7 @@ rte_rawdev_dump(uint16_t dev_id, FILE *f);
  *   0 for success
  *  !0 Error; attr_value remains untouched in case of error.
  */
-int __rte_experimental
+int
 rte_rawdev_get_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    uint64_t *attr_value);
@@ -357,7 +358,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
  *   0 for success
  *  !0 Error
  */
-int __rte_experimental
+int
 rte_rawdev_set_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    const uint64_t attr_value);
@@ -383,7 +384,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
  *  Whether partial enqueue is failure or success is defined between app
  *  and driver implementation.
  */
-int __rte_experimental
+int
 rte_rawdev_enqueue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -414,7 +415,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
  *  Whether partial enqueue is failure or success is defined between app
  *  and driver implementation.
  */
-int __rte_experimental
+int
 rte_rawdev_dequeue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -454,7 +455,7 @@ struct rte_rawdev_xstats_name {
  *        -ENODEV for invalid *dev_id*
  *        -ENOTSUP if the device doesn't support this function.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_names_get(uint16_t dev_id,
 			    struct rte_rawdev_xstats_name *xstats_names,
 			    unsigned int size);
@@ -478,7 +479,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
  *        -ENODEV for invalid *dev_id*
  *        -ENOTSUP if the device doesn't support this function.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_get(uint16_t dev_id,
 		      const unsigned int ids[],
 		      uint64_t values[],
@@ -500,7 +501,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
  *   - positive value or zero: the stat value
  *   - negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
  */
-uint64_t __rte_experimental
+uint64_t
 rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 			      const char *name,
 			      unsigned int *id);
@@ -520,7 +521,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
  *   - zero: successfully reset the statistics to zero
  *   - negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_reset(uint16_t dev_id,
 			const uint32_t ids[],
 			uint32_t nb_ids);
@@ -539,7 +540,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
  *   0 for success,
  *  !0 for failure, `status_info` argument state is undefined
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_status_get(uint16_t dev_id,
 			       rte_rawdev_obj_t status_info);
 
@@ -557,7 +558,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id,
  *   0 for success,
  *  !0 for failure, `version_info` argument state is undefined
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_version_get(uint16_t dev_id,
 				rte_rawdev_obj_t version_info);
 
@@ -574,7 +575,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id,
  *   0 for successful load
  *  !0 for failure to load the provided image, or image incorrect.
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
 
 /**
@@ -586,7 +587,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
  *   0 for successful Unload
  *  !0 for failure in unloading
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_unload(uint16_t dev_id);
 
 /**
@@ -599,7 +600,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id);
  *   - -ENOTSUP if the device doesn't support selftest
  *   - other values < 0 on failure.
  */
-int __rte_experimental
+int
 rte_rawdev_selftest(uint16_t dev_id);
 
 #ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h
index 408adf0f..bb9bbc35 100644
--- a/lib/librte_rawdev/rte_rawdev_pmd.h
+++ b/lib/librte_rawdev/rte_rawdev_pmd.h
@@ -251,6 +251,24 @@ typedef int (*rawdev_queue_release_t)(struct rte_rawdev *dev,
 				      uint16_t queue_id);
 
 /**
+ * Get the count of number of queues configured on this device.
+ *
+ * Another way to fetch this information is to fetch the device configuration.
+ * But, that assumes that the device configuration managed by the driver has
+ * that kind of information.
+ *
+ * This function helps in getting queue count supported, independently. It
+ * can help in cases where iterator needs to be implemented.
+ *
+ * @param
+ *   Raw device pointer
+ * @return
+ *   Number of queues; 0 is assumed to be a valid response.
+ *
+ */
+typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev);
+
+/**
  * Enqueue an array of raw buffers to the device.
  *
  * Buffer being used is opaque - it can be obtained from mempool or from
@@ -506,6 +524,8 @@ struct rte_rawdev_ops {
 	rawdev_queue_setup_t queue_setup;
 	/**< Release an raw queue. */
 	rawdev_queue_release_t queue_release;
+	/**< Get the number of queues attached to the device */
+	rawdev_queue_count_t queue_count;
 
 	/**< Enqueue an array of raw buffers to device. */
 	rawdev_enqueue_bufs_t enqueue_bufs;
@@ -556,7 +576,7 @@ struct rte_rawdev_ops {
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
 			int socket_id);
 
@@ -568,7 +588,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
  * @return
  *   - 0 on success, negative on error
  */
-int __rte_experimental
+int
 rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
 
 /**
@@ -585,7 +605,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
  *   - Raw device pointer if device is successfully created.
  *   - NULL if device cannot be created.
  */
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
 		    int socket_id);
 
@@ -597,7 +617,7 @@ rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
  * @return
  *   - 0 on success, negative on error
  */
-int __rte_experimental
+int
 rte_rawdev_pmd_uninit(const char *name);
 
 #ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_version.map b/lib/librte_rawdev/rte_rawdev_version.map
index af4465e2..b61dbff1 100644
--- a/lib/librte_rawdev/rte_rawdev_version.map
+++ b/lib/librte_rawdev/rte_rawdev_version.map
@@ -1,4 +1,4 @@
-EXPERIMENTAL {
+DPDK_18.08 {
 	global:
 
 	rte_rawdev_close;
@@ -16,6 +16,7 @@ EXPERIMENTAL {
 	rte_rawdev_pmd_allocate;
 	rte_rawdev_pmd_release;
 	rte_rawdev_queue_conf_get;
+	rte_rawdev_queue_count;
 	rte_rawdev_queue_setup;
 	rte_rawdev_queue_release;
 	rte_rawdev_reset;
diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile
index bde8907d..21a36770 100644
--- a/lib/librte_ring/Makefile
+++ b/lib/librte_ring/Makefile
@@ -11,7 +11,7 @@ LDLIBS += -lrte_eal
 
 EXPORT_MAP := rte_ring_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 253cdc96..7a731d07 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -26,8 +26,9 @@
  * - Bulk dequeue.
  * - Bulk enqueue.
  *
- * Note: the ring implementation is not preemptable. A lcore must not
- * be interrupted by another task that uses the same ring.
+ * Note: the ring implementation is not preemptible. Refer to Programmer's
+ * guide/Environment Abstraction Layer/Multiple pthread/Known Issues/rte_ring
+ * for more information.
  *
  */
 
@@ -62,14 +63,6 @@ enum rte_ring_queue_behavior {
 
 struct rte_memzone; /* forward declaration, so as not to require memzone.h */
 
-#if RTE_CACHE_LINE_SIZE < 128
-#define PROD_ALIGN (RTE_CACHE_LINE_SIZE * 2)
-#define CONS_ALIGN (RTE_CACHE_LINE_SIZE * 2)
-#else
-#define PROD_ALIGN RTE_CACHE_LINE_SIZE
-#define CONS_ALIGN RTE_CACHE_LINE_SIZE
-#endif
-
 /* structure to hold a pair of head/tail values and other metadata */
 struct rte_ring_headtail {
 	volatile uint32_t head;  /**< Prod/consumer head. */
@@ -101,11 +94,15 @@ struct rte_ring {
 	uint32_t mask;           /**< Mask (size-1) of ring. */
 	uint32_t capacity;       /**< Usable size of ring */
 
+	char pad0 __rte_cache_aligned; /**< empty cache line */
+
 	/** Ring producer status. */
-	struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN);
+	struct rte_ring_headtail prod __rte_cache_aligned;
+	char pad1 __rte_cache_aligned; /**< empty cache line */
 
 	/** Ring consumer status. */
-	struct rte_ring_headtail cons __rte_aligned(CONS_ALIGN);
+	struct rte_ring_headtail cons __rte_cache_aligned;
+	char pad2 __rte_cache_aligned; /**< empty cache line */
 };
 
 #define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
@@ -339,7 +336,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
 static __rte_always_inline unsigned int
 __rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table,
 		 unsigned int n, enum rte_ring_queue_behavior behavior,
-		 int is_sp, unsigned int *free_space)
+		 unsigned int is_sp, unsigned int *free_space)
 {
 	uint32_t prod_head, prod_next;
 	uint32_t free_entries;
@@ -381,12 +378,12 @@ end:
 static __rte_always_inline unsigned int
 __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
 		 unsigned int n, enum rte_ring_queue_behavior behavior,
-		 int is_sc, unsigned int *available)
+		 unsigned int is_sc, unsigned int *available)
 {
 	uint32_t cons_head, cons_next;
 	uint32_t entries;
 
-	n = __rte_ring_move_cons_head(r, is_sc, n, behavior,
+	n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
 			&cons_head, &cons_next, &entries);
 	if (n == 0)
 		goto end;
diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h
index 08825ea5..94df3c4a 100644
--- a/lib/librte_ring/rte_ring_c11_mem.h
+++ b/lib/librte_ring/rte_ring_c11_mem.h
@@ -51,7 +51,7 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
  *   If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
  */
 static __rte_always_inline unsigned int
-__rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
+__rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
 		unsigned int n, enum rte_ring_queue_behavior behavior,
 		uint32_t *old_head, uint32_t *new_head,
 		uint32_t *free_entries)
@@ -66,14 +66,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
 
 		*old_head = __atomic_load_n(&r->prod.head,
 					__ATOMIC_ACQUIRE);
-		const uint32_t cons_tail = r->cons.tail;
+
 		/*
 		 *  The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * *old_head > cons_tail). So 'free_entries' is always between 0
 		 * and capacity (which is < size).
 		 */
-		*free_entries = (capacity + cons_tail - *old_head);
+		*free_entries = (capacity + r->cons.tail - *old_head);
 
 		/* check that we have enough room in ring */
 		if (unlikely(n > *free_entries))
@@ -133,13 +133,13 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
 		n = max;
 		*old_head = __atomic_load_n(&r->cons.head,
 					__ATOMIC_ACQUIRE);
-		const uint32_t prod_tail = r->prod.tail;
+
 		/* The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * cons_head > prod_tail). So 'entries' is always between 0
 		 * and size(ring)-1.
 		 */
-		*entries = (prod_tail - *old_head);
+		*entries = (r->prod.tail - *old_head);
 
 		/* Set the actual entries for dequeue */
 		if (n > *entries)
diff --git a/lib/librte_ring/rte_ring_generic.h b/lib/librte_ring/rte_ring_generic.h
index 5b110425..ea7dbe5b 100644
--- a/lib/librte_ring/rte_ring_generic.h
+++ b/lib/librte_ring/rte_ring_generic.h
@@ -53,7 +53,7 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
  *   If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
  */
 static __rte_always_inline unsigned int
-__rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
+__rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
 		unsigned int n, enum rte_ring_queue_behavior behavior,
 		uint32_t *old_head, uint32_t *new_head,
 		uint32_t *free_entries)
@@ -73,14 +73,13 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
 		 */
 		rte_smp_rmb();
 
-		const uint32_t cons_tail = r->cons.tail;
 		/*
 		 *  The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * *old_head > cons_tail). So 'free_entries' is always between 0
 		 * and capacity (which is < size).
 		 */
-		*free_entries = (capacity + cons_tail - *old_head);
+		*free_entries = (capacity + r->cons.tail - *old_head);
 
 		/* check that we have enough room in ring */
 		if (unlikely(n > *free_entries))
@@ -124,7 +123,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
  *     If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
  */
 static __rte_always_inline unsigned int
-__rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
+__rte_ring_move_cons_head(struct rte_ring *r, unsigned int is_sc,
 		unsigned int n, enum rte_ring_queue_behavior behavior,
 		uint32_t *old_head, uint32_t *new_head,
 		uint32_t *entries)
@@ -144,13 +143,12 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
 		 */
 		rte_smp_rmb();
 
-		const uint32_t prod_tail = r->prod.tail;
 		/* The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * cons_head > prod_tail). So 'entries' is always between 0
 		 * and size(ring)-1.
 		 */
-		*entries = (prod_tail - *old_head);
+		*entries = (r->prod.tail - *old_head);
 
 		/* Set the actual entries for dequeue */
 		if (n > *entries)
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 634486c8..9269e5c7 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -276,9 +276,54 @@ rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
 }
 
 static int
+pipe_profile_check(struct rte_sched_pipe_params *params,
+	uint32_t rate)
+{
+	uint32_t i;
+
+	/* Pipe parameters */
+	if (params == NULL)
+		return -10;
+
+	/* TB rate: non-zero, not greater than port rate */
+	if (params->tb_rate == 0 ||
+		params->tb_rate > rate)
+		return -11;
+
+	/* TB size: non-zero */
+	if (params->tb_size == 0)
+		return -12;
+
+	/* TC rate: non-zero, less than pipe rate */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->tc_rate[i] == 0 ||
+			params->tc_rate[i] > params->tb_rate)
+			return -13;
+	}
+
+	/* TC period: non-zero */
+	if (params->tc_period == 0)
+		return -14;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	/* TC3 oversubscription weight: non-zero */
+	if (params->tc_ov_weight == 0)
+		return -15;
+#endif
+
+	/* Queue WRR weights: non-zero */
+	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
+		if (params->wrr_weights[i] == 0)
+			return -16;
+	}
+
+	return 0;
+}
+
+static int
 rte_sched_port_check_params(struct rte_sched_port_params *params)
 {
-	uint32_t i, j;
+	uint32_t i;
 
 	if (params == NULL)
 		return -1;
@@ -324,36 +369,11 @@ rte_sched_port_check_params(struct rte_sched_port_params *params)
 
 	for (i = 0; i < params->n_pipe_profiles; i++) {
 		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
+		int status;
 
-		/* TB rate: non-zero, not greater than port rate */
-		if (p->tb_rate == 0 || p->tb_rate > params->rate)
-			return -10;
-
-		/* TB size: non-zero */
-		if (p->tb_size == 0)
-			return -11;
-
-		/* TC rate: non-zero, less than pipe rate */
-		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-			if (p->tc_rate[j] == 0 || p->tc_rate[j] > p->tb_rate)
-				return -12;
-		}
-
-		/* TC period: non-zero */
-		if (p->tc_period == 0)
-			return -13;
-
-#ifdef RTE_SCHED_SUBPORT_TC_OV
-		/* TC3 oversubscription weight: non-zero */
-		if (p->tc_ov_weight == 0)
-			return -14;
-#endif
-
-		/* Queue WRR weights: non-zero */
-		for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) {
-			if (p->wrr_weights[j] == 0)
-				return -15;
-		}
+		status = pipe_profile_check(p, params->rate);
+		if (status != 0)
+			return status;
 	}
 
 	return 0;
@@ -514,69 +534,80 @@ rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
 }
 
 static void
-rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params)
+rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src,
+	struct rte_sched_pipe_profile *dst,
+	uint32_t rate)
 {
-	uint32_t i, j;
+	uint32_t i;
 
-	for (i = 0; i < port->n_pipe_profiles; i++) {
-		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
-		struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
+	/* Token Bucket */
+	if (src->tb_rate == rate) {
+		dst->tb_credits_per_period = 1;
+		dst->tb_period = 1;
+	} else {
+		double tb_rate = (double) src->tb_rate
+				/ (double) rate;
+		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
 
-		/* Token Bucket */
-		if (src->tb_rate == params->rate) {
-			dst->tb_credits_per_period = 1;
-			dst->tb_period = 1;
-		} else {
-			double tb_rate = (double) src->tb_rate
-				/ (double) params->rate;
-			double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
-
-			rte_approx(tb_rate, d,
-				   &dst->tb_credits_per_period, &dst->tb_period);
-		}
-		dst->tb_size = src->tb_size;
+		rte_approx(tb_rate, d,
+			&dst->tb_credits_per_period, &dst->tb_period);
+	}
+
+	dst->tb_size = src->tb_size;
 
-		/* Traffic Classes */
-		dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
-							    params->rate);
+	/* Traffic Classes */
+	dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
+						rate);
 
-		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++)
-			dst->tc_credits_per_period[j]
-				= rte_sched_time_ms_to_bytes(src->tc_period,
-							     src->tc_rate[j]);
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+		dst->tc_credits_per_period[i]
+			= rte_sched_time_ms_to_bytes(src->tc_period,
+				src->tc_rate[i]);
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
-		dst->tc_ov_weight = src->tc_ov_weight;
+	dst->tc_ov_weight = src->tc_ov_weight;
 #endif
 
-		/* WRR */
-		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-			uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
-			uint32_t lcd, lcd1, lcd2;
-			uint32_t qindex;
-
-			qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
-
-			wrr_cost[0] = src->wrr_weights[qindex];
-			wrr_cost[1] = src->wrr_weights[qindex + 1];
-			wrr_cost[2] = src->wrr_weights[qindex + 2];
-			wrr_cost[3] = src->wrr_weights[qindex + 3];
-
-			lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
-			lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
-			lcd = rte_get_lcd(lcd1, lcd2);
-
-			wrr_cost[0] = lcd / wrr_cost[0];
-			wrr_cost[1] = lcd / wrr_cost[1];
-			wrr_cost[2] = lcd / wrr_cost[2];
-			wrr_cost[3] = lcd / wrr_cost[3];
-
-			dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
-			dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
-			dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
-			dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
-		}
+	/* WRR */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+		uint32_t lcd, lcd1, lcd2;
+		uint32_t qindex;
+
+		qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+
+		wrr_cost[0] = src->wrr_weights[qindex];
+		wrr_cost[1] = src->wrr_weights[qindex + 1];
+		wrr_cost[2] = src->wrr_weights[qindex + 2];
+		wrr_cost[3] = src->wrr_weights[qindex + 3];
+
+		lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+		lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+		lcd = rte_get_lcd(lcd1, lcd2);
+
+		wrr_cost[0] = lcd / wrr_cost[0];
+		wrr_cost[1] = lcd / wrr_cost[1];
+		wrr_cost[2] = lcd / wrr_cost[2];
+		wrr_cost[3] = lcd / wrr_cost[3];
+
+		dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
+		dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
+		dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
+		dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+	}
+}
+
+static void
+rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port,
+	struct rte_sched_port_params *params)
+{
+	uint32_t i;
+
+	for (i = 0; i < port->n_pipe_profiles; i++) {
+		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+		struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
 
+		rte_sched_pipe_profile_convert(src, dst, params->rate);
 		rte_sched_port_log_pipe_profile(port, i);
 	}
 
@@ -932,6 +963,48 @@ rte_sched_pipe_config(struct rte_sched_port *port,
 	return 0;
 }
 
+int __rte_experimental
+rte_sched_port_pipe_profile_add(struct rte_sched_port *port,
+	struct rte_sched_pipe_params *params,
+	uint32_t *pipe_profile_id)
+{
+	struct rte_sched_pipe_profile *pp;
+	uint32_t i;
+	int status;
+
+	/* Port */
+	if (port == NULL)
+		return -1;
+
+	/* Pipe profiles not exceeds the max limit */
+	if (port->n_pipe_profiles >= RTE_SCHED_PIPE_PROFILES_PER_PORT)
+		return -2;
+
+	/* Pipe params */
+	status = pipe_profile_check(params, port->rate);
+	if (status != 0)
+		return status;
+
+	pp = &port->pipe_profiles[port->n_pipe_profiles];
+	rte_sched_pipe_profile_convert(params, pp, port->rate);
+
+	/* Pipe profile not exists */
+	for (i = 0; i < port->n_pipe_profiles; i++)
+		if (memcmp(port->pipe_profiles + i, pp, sizeof(*pp)) == 0)
+			return -3;
+
+	/* Pipe profile commit */
+	*pipe_profile_id = port->n_pipe_profiles;
+	port->n_pipe_profiles++;
+
+	if (port->pipe_tc3_rate_max < params->tc_rate[3])
+		port->pipe_tc3_rate_max = params->tc_rate[3];
+
+	rte_sched_port_log_pipe_profile(port, *pipe_profile_id);
+
+	return 0;
+}
+
 void
 rte_sched_port_pkt_write(struct rte_mbuf *pkt,
 			 uint32_t subport, uint32_t pipe, uint32_t traffic_class,
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
index 5d2a688d..84fa896d 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -57,6 +57,7 @@ extern "C" {
  */
 
 #include <sys/types.h>
+#include <rte_compat.h>
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
@@ -234,6 +235,26 @@ void
 rte_sched_port_free(struct rte_sched_port *port);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Hierarchical scheduler pipe profile add
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param params
+ *   Pipe profile parameters
+ * @param pipe_profile_id
+ *   Set to valid profile id when profile is added successfully.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int __rte_experimental
+rte_sched_port_pipe_profile_add(struct rte_sched_port *port,
+	struct rte_sched_pipe_params *params,
+	uint32_t *pipe_profile_id);
+
+/**
  * Hierarchical scheduler subport configuration
  *
  * @param port
diff --git a/lib/librte_sched/rte_sched_version.map b/lib/librte_sched/rte_sched_version.map
index 3aa159ab..72958879 100644
--- a/lib/librte_sched/rte_sched_version.map
+++ b/lib/librte_sched/rte_sched_version.map
@@ -29,3 +29,9 @@ DPDK_2.1 {
 	rte_sched_port_pkt_read_color;
 
 } DPDK_2.0;
+
+EXPERIMENTAL {
+	global:
+
+	rte_sched_port_pipe_profile_add;
+};
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
index 1e559c99..1954960a 100644
--- a/lib/librte_security/rte_security.c
+++ b/lib/librte_security/rte_security.c
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 NXP.
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of NXP nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #include <rte_malloc.h>
@@ -91,7 +63,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
 			     struct rte_security_session *sess)
 {
 	int ret;
-	struct rte_mempool *mp = rte_mempool_from_obj(sess);
 
 	RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP);
 
@@ -100,7 +71,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
 
 	ret = instance->ops->session_destroy(instance->device, sess);
 	if (!ret)
-		rte_mempool_put(mp, (void *)sess);
+		rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess);
 
 	return ret;
 }
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
index c75c1218..b0d1b97e 100644
--- a/lib/librte_security/rte_security.h
+++ b/lib/librte_security/rte_security.h
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 NXP.
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of NXP nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #ifndef _RTE_SECURITY_H_
@@ -222,6 +194,8 @@ struct rte_security_ipsec_xform {
 	/**< IPsec SA Mode - transport/tunnel */
 	struct rte_security_ipsec_tunnel_param tunnel;
 	/**< Tunnel parameters, NULL for transport mode */
+	uint64_t esn_soft_limit;
+	/**< ESN for which the overflow event need to be raised */
 };
 
 /**
@@ -362,15 +336,17 @@ rte_security_set_pkt_metadata(struct rte_security_ctx *instance,
 			      struct rte_mbuf *mb, void *params);
 
 /**
- * Get userdata associated with the security session which processed the
- * packet. This userdata would be registered while creating the session, and
- * application can use this to identify the SA etc. Device-specific metadata
- * in the mbuf would be used for this.
+ * Get userdata associated with the security session. Device specific metadata
+ * provided would be used to uniquely identify the security session being
+ * referred to. This userdata would be registered while creating the session,
+ * and application can use this to identify the SA etc.
  *
- * This is valid only for inline processed ingress packets.
+ * Device specific metadata would be set in mbuf for inline processed inbound
+ * packets. In addition, the same metadata would be set for IPsec events
+ * reported by rte_eth_event framework.
  *
  * @param   instance	security instance
- * @param   md		device-specific metadata set in mbuf
+ * @param   md		device-specific metadata
  *
  * @return
  *  - On success, userdata
diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h
index 46239049..42f42ffe 100644
--- a/lib/librte_security/rte_security_driver.h
+++ b/lib/librte_security/rte_security_driver.h
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *   Copyright 2017 NXP.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #ifndef _RTE_SECURITY_DRIVER_H_
@@ -134,9 +106,9 @@ typedef int (*security_set_pkt_metadata_t)(void *device,
 		void *params);
 
 /**
- * Get application specific userdata associated with the security session which
- * processed the packet. This would be retrieved using the metadata obtained
- * from packet.
+ * Get application specific userdata associated with the security session.
+ * Device specific metadata provided would be used to uniquely identify
+ * the security session being referred to.
  *
  * @param	device		Crypto/eth device pointer
  * @param	md		Metadata
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index c4a9acb0..276d476a 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -45,6 +45,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_cuckoo.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h
 ifeq ($(CONFIG_RTE_ARCH_X86),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h
diff --git a/lib/librte_table/meson.build b/lib/librte_table/meson.build
index 13e797b4..8b2f8413 100644
--- a/lib/librte_table/meson.build
+++ b/lib/librte_table/meson.build
@@ -1,24 +1,29 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 2
-sources = files('rte_table_lpm.c', 'rte_table_lpm_ipv6.c',
-		'rte_table_hash_cuckoo.c', 'rte_table_hash_key8.c',
-		'rte_table_hash_key16.c', 'rte_table_hash_key32.c',
-		'rte_table_hash_ext.c', 'rte_table_hash_lru.c',
-		'rte_table_array.c', 'rte_table_stub.c')
-headers = files('rte_table.h', 'rte_table_lpm.h',
-		'rte_table_lpm_ipv6.h', 'rte_table_hash.h',
-		'rte_lru.h', 'rte_table_array.h',
+version = 3
+sources = files('rte_table_acl.c',
+		'rte_table_lpm.c',
+		'rte_table_lpm_ipv6.c',
+		'rte_table_hash_cuckoo.c',
+		'rte_table_hash_key8.c',
+		'rte_table_hash_key16.c',
+		'rte_table_hash_key32.c',
+		'rte_table_hash_ext.c',
+		'rte_table_hash_lru.c',
+		'rte_table_array.c',
+		'rte_table_stub.c')
+headers = files('rte_table.h',
+		'rte_table_acl.h',
+		'rte_table_lpm.h',
+		'rte_table_lpm_ipv6.h',
+		'rte_table_hash.h',
+		'rte_table_hash_cuckoo.h',
+		'rte_lru.h',
+		'rte_table_array.h',
 		'rte_table_stub.h')
-deps += ['mbuf', 'port', 'lpm', 'hash']
+deps += ['mbuf', 'port', 'lpm', 'hash', 'acl']
 
 if arch_subdir == 'x86'
 	headers += files('rte_lru_x86.h')
 endif
-
-if dpdk_conf.has('RTE_LIBRTE_ACL')
-	sources += files('rte_table_acl.c')
-	headers += files('rte_table_acl.h')
-	deps += ['acl']
-endif
diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c
index 73d3910e..14d54019 100644
--- a/lib/librte_table/rte_table_acl.c
+++ b/lib/librte_table/rte_table_acl.c
@@ -472,12 +472,6 @@ rte_table_acl_entry_add_bulk(
 			return -EINVAL;
 		}
 
-		if (entries_ptr[i] == NULL) {
-			RTE_LOG(ERR, TABLE, "%s: entries_ptr[%" PRIu32 "] parameter is NULL\n",
-					__func__, i);
-			return -EINVAL;
-		}
-
 		rule = keys[i];
 		if (rule->priority > RTE_ACL_MAX_PRIORITY) {
 			RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__);
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index 7aad84fa..6f55bd57 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -99,9 +99,6 @@ extern struct rte_table_ops rte_table_hash_key8_lru_ops;
 extern struct rte_table_ops rte_table_hash_key16_lru_ops;
 extern struct rte_table_ops rte_table_hash_key32_lru_ops;
 
-/** Cuckoo hash table operations */
-extern struct rte_table_ops rte_table_hash_cuckoo_ops;
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c
index dcb4fe97..f0243033 100644
--- a/lib/librte_table/rte_table_hash_cuckoo.c
+++ b/lib/librte_table/rte_table_hash_cuckoo.c
@@ -10,8 +10,7 @@
 #include <rte_malloc.h>
 #include <rte_log.h>
 
-#include <rte_hash.h>
-#include "rte_table_hash.h"
+#include "rte_table_hash_cuckoo.h"
 
 #ifdef RTE_TABLE_STATS_COLLECT
 
@@ -35,7 +34,7 @@ struct rte_table_hash {
 	uint32_t key_size;
 	uint32_t entry_size;
 	uint32_t n_keys;
-	rte_table_hash_op_hash f_hash;
+	rte_hash_function f_hash;
 	uint32_t seed;
 	uint32_t key_offset;
 
@@ -47,7 +46,7 @@ struct rte_table_hash {
 };
 
 static int
-check_params_create_hash_cuckoo(struct rte_table_hash_params *params)
+check_params_create_hash_cuckoo(struct rte_table_hash_cuckoo_params *params)
 {
 	if (params == NULL) {
 		RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n");
@@ -82,7 +81,7 @@ rte_table_hash_cuckoo_create(void *params,
 			int socket_id,
 			uint32_t entry_size)
 {
-	struct rte_table_hash_params *p = params;
+	struct rte_table_hash_cuckoo_params *p = params;
 	struct rte_hash *h_table;
 	struct rte_table_hash *t;
 	uint32_t total_size;
@@ -107,7 +106,7 @@ rte_table_hash_cuckoo_create(void *params,
 	struct rte_hash_parameters hash_cuckoo_params = {
 		.entries = p->n_keys,
 		.key_len = p->key_size,
-		.hash_func = (rte_hash_function)(p->f_hash),
+		.hash_func = p->f_hash,
 		.hash_func_init_val = p->seed,
 		.socket_id = socket_id,
 		.name = p->name
diff --git a/lib/librte_table/rte_table_hash_cuckoo.h b/lib/librte_table/rte_table_hash_cuckoo.h
new file mode 100644
index 00000000..d9d43121
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_cuckoo.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
+#define __INCLUDE_RTE_TABLE_HASH_CUCKOO_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table Hash Cuckoo
+ *
+ ***/
+#include <stdint.h>
+
+#include <rte_hash.h>
+
+#include "rte_table.h"
+
+/** Hash table parameters */
+struct rte_table_hash_cuckoo_params {
+	/** Name */
+	const char *name;
+
+	/** Key size (number of bytes) */
+	uint32_t key_size;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Key mask */
+	uint8_t *key_mask;
+
+	/** Number of keys */
+	uint32_t n_keys;
+
+	/** Number of buckets */
+	uint32_t n_buckets;
+
+	/** Hash function */
+	rte_hash_function f_hash;
+
+	/** Seed value for the hash function */
+	uint32_t seed;
+};
+
+/** Cuckoo hash table operations */
+extern struct rte_table_ops rte_table_hash_cuckoo_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 4bbcd067..590488c7 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -403,7 +403,7 @@ rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
 
 	if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
 			!(rte_lcore_is_enabled(tim_lcore) ||
-			  rte_lcore_has_role(tim_lcore, ROLE_SERVICE) == 0)))
+			  rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
 		return -1;
 
 	if (type == PERIODICAL)
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6aba..de431fbb 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -18,13 +18,20 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \
+					-lrte_cryptodev -lrte_hash
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
-					vhost_user.c virtio_net.c
+					vhost_user.c virtio_net.c vdpa.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
+
+# only compile vhost crypto when cryptodev is enabled
+ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
index 181711c2..38347ab1 100644
--- a/lib/librte_vhost/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
@@ -16,6 +16,9 @@
 
 #include "fd_man.h"
 
+
+#define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
+
 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
 
 static int
@@ -171,6 +174,38 @@ fdset_del(struct fdset *pfdset, int fd)
 	return dat;
 }
 
+/**
+ *  Unregister the fd from the fdset.
+ *
+ *  If parameters are invalid, return directly -2.
+ *  And check whether fd is busy, if yes, return -1.
+ *  Otherwise, try to delete the fd from fdset and
+ *  return true.
+ */
+int
+fdset_try_del(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -2;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = fdset_find_fd(pfdset, fd);
+	if (i != -1 && pfdset->fd[i].busy) {
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+		return -1;
+	}
+
+	if (i != -1) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+		pfdset->fd[i].dat = NULL;
+	}
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+	return 0;
+}
 
 /**
  * This functions runs in infinite blocking loop until there is no fd in
@@ -258,7 +293,7 @@ fdset_event_dispatch(void *arg)
 			 * because the fd is closed in the cb,
 			 * the old fd val could be reused by when creates new
 			 * listen fd in another thread, we couldn't call
-			 * fd_set_del.
+			 * fdset_del.
 			 */
 			if (remove1 || remove2) {
 				pfdentry->fd = -1;
@@ -272,3 +307,64 @@ fdset_event_dispatch(void *arg)
 
 	return NULL;
 }
+
+static void
+fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
+		   int *remove __rte_unused)
+{
+	char charbuf[16];
+	int r = read(readfd, charbuf, sizeof(charbuf));
+	/*
+	 * Just an optimization, we don't care if read() failed
+	 * so ignore explicitly its return value to make the
+	 * compiler happy
+	 */
+	RTE_SET_USED(r);
+}
+
+void
+fdset_pipe_uninit(struct fdset *fdset)
+{
+	fdset_del(fdset, fdset->u.readfd);
+	close(fdset->u.readfd);
+	close(fdset->u.writefd);
+}
+
+int
+fdset_pipe_init(struct fdset *fdset)
+{
+	int ret;
+
+	if (pipe(fdset->u.pipefd) < 0) {
+		RTE_LOG(ERR, VHOST_FDMAN,
+			"failed to create pipe for vhost fdset\n");
+		return -1;
+	}
+
+	ret = fdset_add(fdset, fdset->u.readfd,
+			fdset_pipe_read_cb, NULL, NULL);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_FDMAN,
+			"failed to add pipe readfd %d into vhost server fdset\n",
+			fdset->u.readfd);
+
+		fdset_pipe_uninit(fdset);
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+fdset_pipe_notify(struct fdset *fdset)
+{
+	int r = write(fdset->u.writefd, "1", 1);
+	/*
+	 * Just an optimization, we don't care if write() failed
+	 * so ignore explicitly its return value to make the
+	 * compiler happy
+	 */
+	RTE_SET_USED(r);
+
+}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3a9276c3..3331bcd9 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -25,6 +25,16 @@ struct fdset {
 	struct fdentry fd[MAX_FDS];
 	pthread_mutex_t fd_mutex;
 	int num;	/* current fd number of this fdset */
+
+	union pipefds {
+		struct {
+			int pipefd[2];
+		};
+		struct {
+			int readfd;
+			int writefd;
+		};
+	} u;
 };
 
 
@@ -34,7 +44,14 @@ int fdset_add(struct fdset *pfdset, int fd,
 	fd_cb rcb, fd_cb wcb, void *dat);
 
 void *fdset_del(struct fdset *pfdset, int fd);
+int fdset_try_del(struct fdset *pfdset, int fd);
 
 void *fdset_event_dispatch(void *arg);
 
+int fdset_pipe_init(struct fdset *fdset);
+
+void fdset_pipe_uninit(struct fdset *fdset);
+
+void fdset_pipe_notify(struct fdset *fdset);
+
 #endif
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index c11ebcaa..c6354fef 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -303,6 +303,13 @@ out:
 	return vva;
 }
 
+void
+vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+{
+	vhost_user_iotlb_cache_remove_all(vq);
+	vhost_user_iotlb_pending_remove_all(vq);
+}
+
 int
 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 {
@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 		 * The cache has already been initialized,
 		 * just drop all cached and pending entries.
 		 */
-		vhost_user_iotlb_cache_remove_all(vq);
-		vhost_user_iotlb_pending_remove_all(vq);
+		vhost_user_iotlb_flush_all(vq);
 	}
 
 #ifdef RTE_LIBRTE_VHOST_NUMA
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index e7083e37..60b9e4c5 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
 						uint8_t perm);
 void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
 						uint64_t size, uint8_t perm);
-
+void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
 int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
 
 #endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index 9e8c0e76..bd62e0e3 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
+# Copyright(c) 2017-2018 Intel Corporation
 
 if host_machine.system() != 'linux'
 	build = false
@@ -9,7 +9,8 @@ if has_libnuma == 1
 endif
 version = 4
 allow_experimental_apis = true
-sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c',
-		'virtio_net.c')
-headers = files('rte_vhost.h')
-deps += ['ethdev']
+sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c',
+		'vhost.c', 'vhost_user.c',
+		'virtio_net.c', 'vhost_crypto.c')
+headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vhost_crypto.h')
+deps += ['ethdev', 'cryptodev', 'hash', 'pci']
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
new file mode 100644
index 00000000..90465ca2
--- /dev/null
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VDPA_H_
+#define _RTE_VDPA_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <rte_pci.h>
+#include "rte_vhost.h"
+
+#define MAX_VDPA_NAME_LEN 128
+
+enum vdpa_addr_type {
+	PCI_ADDR,
+	VDPA_ADDR_MAX
+};
+
+struct rte_vdpa_dev_addr {
+	enum vdpa_addr_type type;
+	union {
+		uint8_t __dummy[64];
+		struct rte_pci_addr pci_addr;
+	};
+};
+
+struct rte_vdpa_dev_ops {
+	/* Get capabilities of this device */
+	int (*get_queue_num)(int did, uint32_t *queue_num);
+	int (*get_features)(int did, uint64_t *features);
+	int (*get_protocol_features)(int did, uint64_t *protocol_features);
+
+	/* Driver configure/close the device */
+	int (*dev_conf)(int vid);
+	int (*dev_close)(int vid);
+
+	/* Enable/disable this vring */
+	int (*set_vring_state)(int vid, int vring, int state);
+
+	/* Set features when changed */
+	int (*set_features)(int vid);
+
+	/* Destination operations when migration done */
+	int (*migration_done)(int vid);
+
+	/* Get the vfio group fd */
+	int (*get_vfio_group_fd)(int vid);
+
+	/* Get the vfio device fd */
+	int (*get_vfio_device_fd)(int vid);
+
+	/* Get the notify area info of the queue */
+	int (*get_notify_area)(int vid, int qid,
+			uint64_t *offset, uint64_t *size);
+
+	/* Reserved for future extension */
+	void *reserved[5];
+};
+
+struct rte_vdpa_device {
+	struct rte_vdpa_dev_addr addr;
+	struct rte_vdpa_dev_ops *ops;
+} __rte_cache_aligned;
+
+/* Register a vdpa device, return did if successful, -1 on failure */
+int __rte_experimental
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops);
+
+/* Unregister a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_unregister_device(int did);
+
+/* Find did of a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
+
+/* Find a vdpa device based on did */
+struct rte_vdpa_device * __rte_experimental
+rte_vdpa_get_device(int did);
+
+#endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d3320699..b02673d4 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -29,6 +29,48 @@ extern "C" {
 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
 #define RTE_VHOST_USER_IOMMU_SUPPORT	(1ULL << 3)
 
+/** Protocol features. */
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_RARP
+#define VHOST_USER_PROTOCOL_F_RARP	2
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK	3
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_NET_MTU
+#define VHOST_USER_PROTOCOL_F_NET_MTU	4
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ	5
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
+#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
+#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
+#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+#endif
+
+/** Indicate whether protocol features negotiation is supported. */
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+#endif
+
 /**
  * Information relating to memory regions including offsets to
  * addresses in QEMUs memory file.
@@ -90,6 +132,11 @@ struct vhost_device_ops {
 /**
  * Convert guest physical address to host virtual address
  *
+ * This function is deprecated because unsafe.
+ * New rte_vhost_va_from_guest_pa() should be used instead to ensure
+ * guest physical ranges are fully and contiguously mapped into
+ * process virtual address space.
+ *
  * @param mem
  *  the guest memory regions
  * @param gpa
@@ -97,6 +144,7 @@ struct vhost_device_ops {
  * @return
  *  the host virtual address on success, 0 on failure
  */
+__rte_deprecated
 static __rte_always_inline uint64_t
 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
 {
@@ -115,6 +163,46 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
 	return 0;
 }
 
+/**
+ * Convert guest physical address to host virtual address safely
+ *
+ * This variant of rte_vhost_gpa_to_vva() takes care all the
+ * requested length is mapped and contiguous in process address
+ * space.
+ *
+ * @param mem
+ *  the guest memory regions
+ * @param gpa
+ *  the guest physical address for querying
+ * @param len
+ *  the size of the requested area to map, updated with actual size mapped
+ * @return
+ *  the host virtual address on success, 0 on failure
+ */
+static __rte_always_inline uint64_t
+rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
+						   uint64_t gpa, uint64_t *len)
+{
+	struct rte_vhost_mem_region *r;
+	uint32_t i;
+
+	for (i = 0; i < mem->nregions; i++) {
+		r = &mem->regions[i];
+		if (gpa >= r->guest_phys_addr &&
+		    gpa <  r->guest_phys_addr + r->size) {
+
+			if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
+				*len = r->guest_phys_addr + r->size - gpa;
+
+			return gpa - r->guest_phys_addr +
+			       r->host_user_addr;
+		}
+	}
+	*len = 0;
+
+	return 0;
+}
+
 #define RTE_VHOST_NEED_LOG(features)	((features) & (1ULL << VHOST_F_LOG_ALL))
 
 /**
@@ -170,6 +258,41 @@ int rte_vhost_driver_register(const char *path, uint64_t flags);
 int rte_vhost_driver_unregister(const char *path);
 
 /**
+ * Set the vdpa device id, enforce single connection per socket
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param did
+ *  Device id
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_attach_vdpa_device(const char *path, int did);
+
+/**
+ * Unset the vdpa device id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_detach_vdpa_device(const char *path);
+
+/**
+ * Get the device id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  Device id, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_vdpa_device_id(const char *path);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
@@ -225,6 +348,33 @@ int rte_vhost_driver_disable_features(const char *path, uint64_t features);
 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
 
 /**
+ * Get the protocol feature bits before feature negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param protocol_features
+ *  A pointer to store the queried protocol feature bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_protocol_features(const char *path,
+		uint64_t *protocol_features);
+
+/**
+ * Get the queue number bits before feature negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param queue_num
+ *  A pointer to store the queried queue number bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
+
+/**
  * Get the feature bits after negotiation
  *
  * @param vid
@@ -434,6 +584,68 @@ int rte_vhost_vring_call(int vid, uint16_t vring_idx);
  */
 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
 
+/**
+ * Get log base and log size of the vhost device
+ *
+ * @param vid
+ *  vhost device ID
+ * @param log_base
+ *  vhost log base
+ * @param log_size
+ *  vhost log size
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
+
+/**
+ * Get last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  vhost queue index
+ * @param last_avail_idx
+ *  vhost last_avail_idx to get
+ * @param last_used_idx
+ *  vhost last_used_idx to get
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_vring_base(int vid, uint16_t queue_id,
+		uint16_t *last_avail_idx, uint16_t *last_used_idx);
+
+/**
+ * Set last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  vhost queue index
+ * @param last_avail_idx
+ *  last_avail_idx to set
+ * @param last_used_idx
+ *  last_used_idx to set
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_vring_base(int vid, uint16_t queue_id,
+		uint16_t last_avail_idx, uint16_t last_used_idx);
+
+/**
+ * Get vdpa device id for vhost device.
+ *
+ * @param vid
+ *  vhost device id
+ * @return
+ *  device id
+ */
+int __rte_experimental
+rte_vhost_get_vdpa_device_id(int vid);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h
new file mode 100644
index 00000000..f9fbc054
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_crypto.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _VHOST_CRYPTO_H_
+#define _VHOST_CRYPTO_H_
+
+#define VHOST_CRYPTO_MBUF_POOL_SIZE		(8192)
+#define VHOST_CRYPTO_MAX_BURST_SIZE		(64)
+#define VHOST_CRYPTO_SESSION_MAP_ENTRIES	(1024) /**< Max nb sessions */
+/** max nb virtual queues in a burst for finalizing*/
+#define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS		(64)
+
+enum rte_vhost_crypto_zero_copy {
+	RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0,
+	RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE = 1,
+	RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS
+};
+
+/**
+ *  Create Vhost-crypto instance
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param cryptodev_id
+ *  The identifier of DPDK Cryptodev, the same cryptodev_id can be assigned to
+ *  multiple Vhost-crypto devices.
+ * @param sess_pool
+ *  The pointer to the created cryptodev session pool with the private data size
+ *  matches the target DPDK Cryptodev.
+ * @param socket_id
+ *  NUMA Socket ID to allocate resources on. *
+ * @return
+ *  0 if the Vhost Crypto Instance is created successfully.
+ *  Negative integer if otherwise
+ */
+int __rte_experimental
+rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
+		struct rte_mempool *sess_pool, int socket_id);
+
+/**
+ *  Free the Vhost-crypto instance
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @return
+ *  0 if the Vhost Crypto Instance is created successfully.
+ *  Negative integer if otherwise.
+ */
+int __rte_experimental
+rte_vhost_crypto_free(int vid);
+
+/**
+ *  Enable or disable zero copy feature
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param option
+ *  Flag of zero copy feature.
+ * @return
+ *  0 if completed successfully.
+ *  Negative integer if otherwise.
+ */
+int __rte_experimental
+rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option);
+
+/**
+ * Fetch a number of vring descriptors from virt-queue and translate to DPDK
+ * crypto operations. After this function is executed, the user can enqueue
+ * the processed ops to the target cryptodev.
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param qid
+ *  Virtio queue index.
+ * @param ops
+ *  The address of an array of pointers to *rte_crypto_op* structures that must
+ *  be large enough to store *nb_ops* pointers in it.
+ * @param nb_ops
+ *  The maximum number of operations to be fetched and translated.
+ * @return
+ *  The number of fetched and processed vhost crypto request operations.
+ */
+uint16_t __rte_experimental
+rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
+		struct rte_crypto_op **ops, uint16_t nb_ops);
+/**
+ * Finalize the dequeued crypto ops. After the translated crypto ops are
+ * dequeued from the cryptodev, this function shall be called to write the
+ * processed data back to the vring descriptor (if no-copy is turned off).
+ *
+ * @param ops
+ *  The address of an array of *rte_crypto_op* structure that was dequeued
+ *  from cryptodev.
+ * @param nb_ops
+ *  The number of operations contained in the array.
+ * @callfds
+ *  The callfd number(s) contained in this burst, this shall be an array with
+ *  no less than VIRTIO_CRYPTO_MAX_NUM_BURST_VQS elements.
+ * @nb_callfds
+ *  The number of call_fd numbers exist in the callfds.
+ * @return
+ *  The number of ops processed.
+ */
+uint16_t __rte_experimental
+rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfds, uint16_t *nb_callfds);
+
+#endif /**< _VHOST_CRYPTO_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index df010312..da220dd0 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -59,3 +59,27 @@ DPDK_18.02 {
 	rte_vhost_vring_call;
 
 } DPDK_17.08;
+
+EXPERIMENTAL {
+	global:
+
+	rte_vdpa_register_device;
+	rte_vdpa_unregister_device;
+	rte_vdpa_find_device_id;
+	rte_vdpa_get_device;
+	rte_vhost_driver_attach_vdpa_device;
+	rte_vhost_driver_detach_vdpa_device;
+	rte_vhost_driver_get_vdpa_device_id;
+	rte_vhost_get_vdpa_device_id;
+	rte_vhost_driver_get_protocol_features;
+	rte_vhost_driver_get_queue_num;
+	rte_vhost_get_log_base;
+	rte_vhost_get_vring_base;
+	rte_vhost_set_vring_base;
+	rte_vhost_crypto_create;
+	rte_vhost_crypto_free;
+	rte_vhost_crypto_fetch_requests;
+	rte_vhost_crypto_finalize_requests;
+	rte_vhost_crypto_set_zero_copy;
+	rte_vhost_va_from_guest_pa;
+};
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 83befdce..d6303174 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -4,7 +4,6 @@
 
 #include <stdint.h>
 #include <stdio.h>
-#include <stdbool.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -52,6 +51,13 @@ struct vhost_user_socket {
 	uint64_t supported_features;
 	uint64_t features;
 
+	/*
+	 * Device id to identify a specific backend device.
+	 * It's set to -1 for the default software implementation.
+	 * If valid, one socket can have 1 connection only.
+	 */
+	int vdpa_dev_id;
+
 	struct vhost_device_ops const *notify_ops;
 };
 
@@ -97,6 +103,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	size_t fdsize = fd_num * sizeof(int);
 	char control[CMSG_SPACE(fdsize)];
 	struct cmsghdr *cmsg;
+	int got_fds = 0;
 	int ret;
 
 	memset(&msgh, 0, sizeof(msgh));
@@ -123,11 +130,16 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
 			(cmsg->cmsg_type == SCM_RIGHTS)) {
-			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
 			break;
 		}
 	}
 
+	/* Clear out unused file descriptors */
+	while (got_fds < fd_num)
+		fds[got_fds++] = -1;
+
 	return ret;
 }
 
@@ -153,6 +165,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 		msgh.msg_control = control;
 		msgh.msg_controllen = sizeof(control);
 		cmsg = CMSG_FIRSTHDR(&msgh);
+		if (cmsg == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
+			errno = EINVAL;
+			return -1;
+		}
 		cmsg->cmsg_len = CMSG_LEN(fdsize);
 		cmsg->cmsg_level = SOL_SOCKET;
 		cmsg->cmsg_type = SCM_RIGHTS;
@@ -163,7 +180,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	}
 
 	do {
-		ret = sendmsg(sockfd, &msgh, 0);
+		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
 	} while (ret < 0 && errno == EINTR);
 
 	if (ret < 0) {
@@ -182,6 +199,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	struct vhost_user_connection *conn;
 	int ret;
 
+	if (vsocket == NULL)
+		return;
+
 	conn = malloc(sizeof(*conn));
 	if (conn == NULL) {
 		close(fd);
@@ -198,6 +218,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 
 	vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
 
+	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id);
+
 	if (vsocket->dequeue_zero_copy)
 		vhost_enable_dequeue_zero_copy(vid);
 
@@ -232,6 +254,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	pthread_mutex_lock(&vsocket->conn_mutex);
 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
 	pthread_mutex_unlock(&vsocket->conn_mutex);
+
+	fdset_pipe_notify(&vhost_user.fdset);
 	return;
 
 err:
@@ -318,6 +342,16 @@ vhost_user_start_server(struct vhost_user_socket *vsocket)
 	int fd = vsocket->socket_fd;
 	const char *path = vsocket->path;
 
+	/*
+	 * bind () may fail if the socket file with the same name already
+	 * exists. But the library obviously should not delete the file
+	 * provided by the user, since we can not be sure that it is not
+	 * being used by other applications. Moreover, many applications form
+	 * socket names based on user input, which is prone to errors.
+	 *
+	 * The user must ensure that the socket does not exist before
+	 * registering the vhost driver in server mode.
+	 */
 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
@@ -436,7 +470,6 @@ static int
 vhost_user_reconnect_init(void)
 {
 	int ret;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
 	if (ret < 0) {
@@ -445,7 +478,7 @@ vhost_user_reconnect_init(void)
 	}
 	TAILQ_INIT(&reconn_list.head);
 
-	ret = pthread_create(&reconn_tid, NULL,
+	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
 			     vhost_user_client_reconnect, NULL);
 	if (ret != 0) {
 		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
@@ -453,14 +486,6 @@ vhost_user_reconnect_init(void)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to destroy reconnect mutex");
 		}
-	} else {
-		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
-			 "vhost-reconn");
-
-		if (rte_thread_setname(reconn_tid, thread_name)) {
-			RTE_LOG(DEBUG, VHOST_CONFIG,
-				"failed to set reconnect thread name");
-		}
 	}
 
 	return ret;
@@ -524,6 +549,52 @@ find_vhost_user_socket(const char *path)
 }
 
 int
+rte_vhost_driver_attach_vdpa_device(const char *path, int did)
+{
+	struct vhost_user_socket *vsocket;
+
+	if (rte_vdpa_get_device(did) == NULL)
+		return -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->vdpa_dev_id = did;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_detach_vdpa_device(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->vdpa_dev_id = -1;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_get_vdpa_device_id(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+	int did = -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		did = vsocket->vdpa_dev_id;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return did;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
@@ -591,19 +662,136 @@ int
 rte_vhost_driver_get_features(const char *path, uint64_t *features)
 {
 	struct vhost_user_socket *vsocket;
+	uint64_t vdpa_features;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
 
 	pthread_mutex_lock(&vhost_user.mutex);
 	vsocket = find_vhost_user_socket(path);
-	if (vsocket)
+	if (!vsocket) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"socket file %s is not registered yet.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_features) {
 		*features = vsocket->features;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa features "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*features = vsocket->features & vdpa_features;
+
+unlock_exit:
 	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
+rte_vhost_driver_get_protocol_features(const char *path,
+		uint64_t *protocol_features)
+{
+	struct vhost_user_socket *vsocket;
+	uint64_t vdpa_protocol_features;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
 
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
 	if (!vsocket) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"socket file %s is not registered yet.\n", path);
-		return -1;
-	} else {
-		return 0;
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
+		*protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_protocol_features(did,
+				&vdpa_protocol_features) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa protocol features "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*protocol_features = VHOST_USER_PROTOCOL_FEATURES
+		& vdpa_protocol_features;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
+rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
+{
+	struct vhost_user_socket *vsocket;
+	uint32_t vdpa_queue_num;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"socket file %s is not registered yet.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) {
+		*queue_num = VHOST_MAX_QUEUE_PAIRS;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa queue number "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+static void
+vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
+{
+	if (vsocket && vsocket->path) {
+		free(vsocket->path);
+		vsocket->path = NULL;
+	}
+
+	if (vsocket) {
+		free(vsocket);
+		vsocket = NULL;
 	}
 }
 
@@ -637,7 +825,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	if (vsocket->path == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"error: failed to copy socket path string\n");
-		free(vsocket);
+		vhost_user_socket_mem_free(vsocket);
 		goto out;
 	}
 	TAILQ_INIT(&vsocket->conn_list);
@@ -665,6 +853,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
 
+	/* Dequeue zero copy can't assure descriptors returned in order */
+	if (vsocket->dequeue_zero_copy) {
+		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+		vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+	}
+
 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
@@ -695,8 +889,7 @@ out_mutex:
 			"error: failed to destroy connection mutex\n");
 	}
 out_free:
-	free(vsocket->path);
-	free(vsocket);
+	vhost_user_socket_mem_free(vsocket);
 out:
 	pthread_mutex_unlock(&vhost_user.mutex);
 
@@ -743,21 +936,25 @@ rte_vhost_driver_unregister(const char *path)
 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
 
 		if (!strcmp(vsocket->path, path)) {
-			if (vsocket->is_server) {
-				fdset_del(&vhost_user.fdset, vsocket->socket_fd);
-				close(vsocket->socket_fd);
-				unlink(path);
-			} else if (vsocket->reconnect) {
-				vhost_user_remove_reconnect(vsocket);
-			}
-
+again:
 			pthread_mutex_lock(&vsocket->conn_mutex);
 			for (conn = TAILQ_FIRST(&vsocket->conn_list);
 			     conn != NULL;
 			     conn = next) {
 				next = TAILQ_NEXT(conn, next);
 
-				fdset_del(&vhost_user.fdset, conn->connfd);
+				/*
+				 * If r/wcb is executing, release the
+				 * conn_mutex lock, and try again since
+				 * the r/wcb may use the conn_mutex lock.
+				 */
+				if (fdset_try_del(&vhost_user.fdset,
+						  conn->connfd) == -1) {
+					pthread_mutex_unlock(
+							&vsocket->conn_mutex);
+					goto again;
+				}
+
 				RTE_LOG(INFO, VHOST_CONFIG,
 					"free connfd = %d for device '%s'\n",
 					conn->connfd, path);
@@ -768,9 +965,17 @@ rte_vhost_driver_unregister(const char *path)
 			}
 			pthread_mutex_unlock(&vsocket->conn_mutex);
 
+			if (vsocket->is_server) {
+				fdset_del(&vhost_user.fdset,
+						vsocket->socket_fd);
+				close(vsocket->socket_fd);
+				unlink(path);
+			} else if (vsocket->reconnect) {
+				vhost_user_remove_reconnect(vsocket);
+			}
+
 			pthread_mutex_destroy(&vsocket->conn_mutex);
-			free(vsocket->path);
-			free(vsocket);
+			vhost_user_socket_mem_free(vsocket);
 
 			count = --vhost_user.vsocket_cnt;
 			vhost_user.vsockets[i] = vhost_user.vsockets[count];
@@ -829,11 +1034,26 @@ rte_vhost_driver_start(const char *path)
 		return -1;
 
 	if (fdset_tid == 0) {
-		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
-				     &vhost_user.fdset);
-		if (ret != 0)
+		/**
+		 * create a pipe which will be waited by poll and notified to
+		 * rebuild the wait list of poll.
+		 */
+		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to create pipe for vhost fdset\n");
+			return -1;
+		}
+
+		int ret = rte_ctrl_thread_create(&fdset_tid,
+			"vhost-events", NULL, fdset_event_dispatch,
+			&vhost_user.fdset);
+		if (ret != 0) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to create fdset handling thread");
+
+			fdset_pipe_uninit(&vhost_user.fdset);
+			return -1;
+		}
 	}
 
 	if (vsocket->is_server)
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
new file mode 100644
index 00000000..c82fd437
--- /dev/null
+++ b/lib/librte_vhost/vdpa.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include "rte_vdpa.h"
+#include "vhost.h"
+
+static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE];
+static uint32_t vdpa_device_num;
+
+static bool
+is_same_vdpa_device(struct rte_vdpa_dev_addr *a,
+		struct rte_vdpa_dev_addr *b)
+{
+	bool ret = true;
+
+	if (a->type != b->type)
+		return false;
+
+	switch (a->type) {
+	case PCI_ADDR:
+		if (a->pci_addr.domain != b->pci_addr.domain ||
+				a->pci_addr.bus != b->pci_addr.bus ||
+				a->pci_addr.devid != b->pci_addr.devid ||
+				a->pci_addr.function != b->pci_addr.function)
+			ret = false;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops)
+{
+	struct rte_vdpa_device *dev;
+	char device_name[MAX_VDPA_NAME_LEN];
+	int i;
+
+	if (vdpa_device_num >= MAX_VHOST_DEVICE)
+		return -1;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		dev = vdpa_devices[i];
+		if (dev && is_same_vdpa_device(&dev->addr, addr))
+			return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vdpa_devices[i] == NULL)
+			break;
+	}
+
+	sprintf(device_name, "vdpa-dev-%d", i);
+	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
+			RTE_CACHE_LINE_SIZE);
+	if (!dev)
+		return -1;
+
+	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
+	dev->ops = ops;
+	vdpa_devices[i] = dev;
+	vdpa_device_num++;
+
+	return i;
+}
+
+int
+rte_vdpa_unregister_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)
+		return -1;
+
+	rte_free(vdpa_devices[did]);
+	vdpa_devices[did] = NULL;
+	vdpa_device_num--;
+
+	return did;
+}
+
+int
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
+{
+	struct rte_vdpa_device *dev;
+	int i;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
+		dev = vdpa_devices[i];
+		if (dev && is_same_vdpa_device(&dev->addr, addr))
+			return i;
+	}
+
+	return -1;
+}
+
+struct rte_vdpa_device *
+rte_vdpa_get_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE)
+		return NULL;
+
+	return vdpa_devices[did];
+}
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a407067e..3c9be10a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2010-2017 Intel Corporation
  */
 
 #include <linux/vhost.h>
@@ -29,17 +29,17 @@ struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 /* Called with iotlb_lock read-locked */
 uint64_t
 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		    uint64_t iova, uint64_t size, uint8_t perm)
+		    uint64_t iova, uint64_t *size, uint8_t perm)
 {
 	uint64_t vva, tmp_size;
 
-	if (unlikely(!size))
+	if (unlikely(!*size))
 		return 0;
 
-	tmp_size = size;
+	tmp_size = *size;
 
 	vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
-	if (tmp_size == size)
+	if (tmp_size == *size)
 		return vva;
 
 	iova += tmp_size;
@@ -68,19 +68,6 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-struct virtio_net *
-get_device(int vid)
-{
-	struct virtio_net *dev = vhost_devices[vid];
-
-	if (unlikely(!dev)) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) device not found.\n", vid);
-	}
-
-	return dev;
-}
-
 void
 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
 {
@@ -106,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy)
 }
 
 void
-free_vq(struct vhost_virtqueue *vq)
+free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	rte_free(vq->shadow_used_ring);
+	if (vq_is_packed(dev))
+		rte_free(vq->shadow_used_packed);
+	else
+		rte_free(vq->shadow_used_split);
 	rte_free(vq->batch_copy_elems);
 	rte_mempool_free(vq->iotlb_pool);
 	rte_free(vq);
@@ -123,42 +113,95 @@ free_device(struct virtio_net *dev)
 	uint32_t i;
 
 	for (i = 0; i < dev->nr_vring; i++)
-		free_vq(dev->virtqueue[i]);
+		free_vq(dev, dev->virtqueue[i]);
 
 	rte_free(dev);
 }
 
-int
-vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+static int
+vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	uint64_t size;
+	uint64_t req_size, size;
 
-	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-		goto out;
-
-	size = sizeof(struct vring_desc) * vq->size;
+	req_size = sizeof(struct vring_desc) * vq->size;
+	size = req_size;
 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.desc_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->desc)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->desc || size != req_size)
 		return -1;
 
-	size = sizeof(struct vring_avail);
-	size += sizeof(uint16_t) * vq->size;
+	req_size = sizeof(struct vring_avail);
+	req_size += sizeof(uint16_t) * vq->size;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		req_size += sizeof(uint16_t);
+	size = req_size;
 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.avail_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->avail)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->avail || size != req_size)
 		return -1;
 
-	size = sizeof(struct vring_used);
-	size += sizeof(struct vring_used_elem) * vq->size;
+	req_size = sizeof(struct vring_used);
+	req_size += sizeof(struct vring_used_elem) * vq->size;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		req_size += sizeof(uint16_t);
+	size = req_size;
 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.used_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->used)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->used || size != req_size)
+		return -1;
+
+	return 0;
+}
+
+static int
+vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint64_t req_size, size;
+
+	req_size = sizeof(struct vring_packed_desc) * vq->size;
+	size = req_size;
+	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->desc_packed || size != req_size)
 		return -1;
 
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->driver_event || size != req_size)
+		return -1;
+
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->device_event || size != req_size)
+		return -1;
+
+	return 0;
+}
+
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+
+	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+		goto out;
+
+	if (vq_is_packed(dev)) {
+		if (vring_translate_packed(dev, vq) < 0)
+			return -1;
+	} else {
+		if (vring_translate_split(dev, vq) < 0)
+			return -1;
+	}
 out:
 	vq->access_ok = 1;
 
@@ -240,6 +283,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 	dev->virtqueue[vring_idx] = vq;
 	init_vring_queue(dev, vring_idx);
 	rte_spinlock_init(&vq->access_lock);
+	vq->avail_wrap_counter = 1;
+	vq->used_wrap_counter = 1;
+	vq->signalled_used_valid = false;
 
 	dev->nr_vring += 1;
 
@@ -274,21 +320,21 @@ vhost_new_device(void)
 	struct virtio_net *dev;
 	int i;
 
-	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
-	if (dev == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to allocate memory for new dev.\n");
-		return -1;
-	}
-
 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
 		if (vhost_devices[i] == NULL)
 			break;
 	}
+
 	if (i == MAX_VHOST_DEVICE) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"Failed to find a free slot for new device.\n");
-		rte_free(dev);
+		return -1;
+	}
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
 		return -1;
 	}
 
@@ -296,10 +342,28 @@ vhost_new_device(void)
 	dev->vid = i;
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->slave_req_fd = -1;
+	dev->vdpa_dev_id = -1;
+	rte_spinlock_init(&dev->slave_req_lock);
 
 	return i;
 }
 
+void
+vhost_destroy_device_notify(struct virtio_net *dev)
+{
+	struct rte_vdpa_device *vdpa_dev;
+	int did;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		did = dev->vdpa_dev_id;
+		vdpa_dev = rte_vdpa_get_device(did);
+		if (vdpa_dev && vdpa_dev->ops->dev_close)
+			vdpa_dev->ops->dev_close(dev->vid);
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+}
+
 /*
  * Invoked when there is the vhost-user connection is broken (when
  * the virtio device is being detached).
@@ -312,10 +376,7 @@ vhost_destroy_device(int vid)
 	if (dev == NULL)
 		return;
 
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 1);
 	free_device(dev);
@@ -324,6 +385,33 @@ vhost_destroy_device(int vid)
 }
 
 void
+vhost_attach_vdpa_device(int vid, int did)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	if (rte_vdpa_get_device(did) == NULL)
+		return;
+
+	dev->vdpa_dev_id = did;
+}
+
+void
+vhost_detach_vdpa_device(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	vhost_user_host_notifier_ctrl(vid, false);
+
+	dev->vdpa_dev_id = -1;
+}
+
+void
 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
 {
 	struct virtio_net *dev;
@@ -532,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
 	if (!vq)
 		return -1;
 
-	vhost_vring_call(dev, vq);
+	if (vq_is_packed(dev))
+		vhost_vring_call_packed(dev, vq);
+	else
+		vhost_vring_call_split(dev, vq);
+
 	return 0;
 }
 
@@ -553,21 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
+static inline void
+vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+{
+	if (enable)
+		vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+	else
+		vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static inline void
+vhost_enable_notify_packed(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable)
+{
+	uint16_t flags;
+
+	if (!enable)
+		vq->device_event->flags = VRING_EVENT_F_DISABLE;
+
+	flags = VRING_EVENT_F_ENABLE;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+		flags = VRING_EVENT_F_DESC;
+		vq->device_event->off_wrap = vq->last_avail_idx |
+			vq->avail_wrap_counter << 15;
+	}
+
+	rte_smp_wmb();
+
+	vq->device_event->flags = flags;
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
 	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
 
-	if (dev == NULL)
+	if (!dev)
 		return -1;
 
-	if (enable) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"guest notification isn't supported.\n");
-		return -1;
-	}
+	vq = dev->virtqueue[queue_id];
+
+	if (vq_is_packed(dev))
+		vhost_enable_notify_packed(dev, vq, enable);
+	else
+		vhost_enable_notify_split(vq, enable);
 
-	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
 	return 0;
 }
 
@@ -627,3 +750,76 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 
 	return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
 }
+
+int rte_vhost_get_vdpa_device_id(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	return dev->vdpa_dev_id;
+}
+
+int rte_vhost_get_log_base(int vid, uint64_t *log_base,
+		uint64_t *log_size)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	*log_base = dev->log_base;
+	*log_size = dev->log_size;
+
+	return 0;
+}
+
+int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
+		uint16_t *last_avail_idx, uint16_t *last_used_idx)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	*last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
+	*last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
+
+	return 0;
+}
+
+int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
+		uint16_t last_avail_idx, uint16_t last_used_idx)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
+	dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d947bc9e..760a09c0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -1,11 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _VHOST_NET_CDEV_H_
 #define _VHOST_NET_CDEV_H_
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <unistd.h>
@@ -19,6 +20,7 @@
 #include <rte_rwlock.h>
 
 #include "rte_vhost.h"
+#include "rte_vdpa.h"
 
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1
@@ -26,17 +28,22 @@
 #define VIRTIO_DEV_READY 2
 /* Used to indicate that the built-in vhost net device backend is enabled */
 #define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4
+/* Used to indicate that the device has its own data path and configured */
+#define VIRTIO_DEV_VDPA_CONFIGURED 8
 
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
 
 #define BUF_VECTOR_MAX 256
 
+#define VHOST_LOG_CACHE_NR 32
+
 /**
  * Structure contains buffer address, length and descriptor index
  * from vring to do scatter RX.
  */
 struct buf_vector {
+	uint64_t buf_iova;
 	uint64_t buf_addr;
 	uint32_t buf_len;
 	uint32_t desc_idx;
@@ -49,6 +56,7 @@ struct buf_vector {
 struct zcopy_mbuf {
 	struct rte_mbuf *mbuf;
 	uint32_t desc_idx;
+	uint16_t desc_count;
 	uint16_t in_use;
 
 	TAILQ_ENTRY(zcopy_mbuf) next;
@@ -65,19 +73,43 @@ struct batch_copy_elem {
 	uint64_t log_addr;
 };
 
+/*
+ * Structure that contains the info for batched dirty logging.
+ */
+struct log_cache_entry {
+	uint32_t offset;
+	unsigned long val;
+};
+
+struct vring_used_elem_packed {
+	uint16_t id;
+	uint32_t len;
+	uint32_t count;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
-	struct vring_desc	*desc;
-	struct vring_avail	*avail;
-	struct vring_used	*used;
+	union {
+		struct vring_desc	*desc;
+		struct vring_packed_desc   *desc_packed;
+	};
+	union {
+		struct vring_avail	*avail;
+		struct vring_packed_desc_event *driver_event;
+	};
+	union {
+		struct vring_used	*used;
+		struct vring_packed_desc_event *device_event;
+	};
 	uint32_t		size;
 
 	uint16_t		last_avail_idx;
 	uint16_t		last_used_idx;
 	/* Last used index we notify to front end. */
 	uint16_t		signalled_used;
+	bool			signalled_used_valid;
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
@@ -101,12 +133,20 @@ struct vhost_virtqueue {
 	struct zcopy_mbuf	*zmbufs;
 	struct zcopy_mbuf_list	zmbuf_list;
 
-	struct vring_used_elem  *shadow_used_ring;
+	union {
+		struct vring_used_elem  *shadow_used_split;
+		struct vring_used_elem_packed *shadow_used_packed;
+	};
 	uint16_t                shadow_used_idx;
 	struct vhost_vring_addr ring_addrs;
 
 	struct batch_copy_elem	*batch_copy_elems;
 	uint16_t		batch_copy_nb_elems;
+	bool			used_wrap_counter;
+	bool			avail_wrap_counter;
+
+	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
+	uint16_t log_cache_nb_elem;
 
 	rte_rwlock_t	iotlb_lock;
 	rte_rwlock_t	iotlb_pending_lock;
@@ -174,7 +214,41 @@ struct vhost_msg {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
-#define VHOST_USER_F_PROTOCOL_FEATURES	30
+/* Declare packed ring related bits for older kernels */
+#ifndef VIRTIO_F_RING_PACKED
+
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_NEXT	1
+#define VRING_DESC_F_WRITE	2
+#define VRING_DESC_F_INDIRECT	4
+
+#define VRING_DESC_F_AVAIL	(1ULL << 7)
+#define VRING_DESC_F_USED	(1ULL << 15)
+
+struct vring_packed_desc {
+	uint64_t addr;
+	uint32_t len;
+	uint16_t id;
+	uint16_t flags;
+};
+
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+
+struct vring_packed_desc_event {
+	uint16_t off_wrap;
+	uint16_t flags;
+};
+#endif
+
+/*
+ * Available and used descs are in same order
+ */
+#ifndef VIRTIO_F_IN_ORDER
+#define VIRTIO_F_IN_ORDER      35
+#endif
 
 /* Features supported by this builtin vhost-user net driver. */
 #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
@@ -199,7 +273,8 @@ struct vhost_msg {
 				(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
 				(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
 				(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
-				(1ULL << VIRTIO_NET_F_MTU) | \
+				(1ULL << VIRTIO_NET_F_MTU)  | \
+				(1ULL << VIRTIO_F_IN_ORDER) | \
 				(1ULL << VIRTIO_F_IOMMU_PLATFORM))
 
 
@@ -210,6 +285,51 @@ struct guest_page {
 };
 
 /**
+ * function prototype for the vhost backend to handler specific vhost user
+ * messages prior to the master message handling
+ *
+ * @param vid
+ *  vhost device id
+ * @param msg
+ *  Message pointer.
+ * @param require_reply
+ *  If the handler requires sending a reply, this varaible shall be written 1,
+ *  otherwise 0.
+ * @param skip_master
+ *  If the handler requires skipping the master message handling, this variable
+ *  shall be written 1, otherwise 0.
+ * @return
+ *  0 on success, -1 on failure
+ */
+typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
+		uint32_t *require_reply, uint32_t *skip_master);
+
+/**
+ * function prototype for the vhost backend to handler specific vhost user
+ * messages after the master message handling is done
+ *
+ * @param vid
+ *  vhost device id
+ * @param msg
+ *  Message pointer.
+ * @param require_reply
+ *  If the handler requires sending a reply, this varaible shall be written 1,
+ *  otherwise 0.
+ * @return
+ *  0 on success, -1 on failure
+ */
+typedef int (*vhost_msg_post_handle)(int vid, void *msg,
+		uint32_t *require_reply);
+
+/**
+ * pre and post vhost user message handlers
+ */
+struct vhost_user_extern_ops {
+	vhost_msg_pre_handle pre_msg_handle;
+	vhost_msg_post_handle post_msg_handle;
+};
+
+/**
  * Device structure contains all configuration information relating
  * to the device.
  */
@@ -241,8 +361,32 @@ struct virtio_net {
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
+	rte_spinlock_t		slave_req_lock;
+
+	/*
+	 * Device id to identify a specific backend device.
+	 * It's set to -1 for the default software implementation.
+	 */
+	int			vdpa_dev_id;
+
+	/* private data for virtio device */
+	void			*extern_data;
+	/* pre and post vhost user message handlers for the device */
+	struct vhost_user_extern_ops extern_ops;
 } __rte_cache_aligned;
 
+static __rte_always_inline bool
+vq_is_packed(struct virtio_net *dev)
+{
+	return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline bool
+desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
+{
+	return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
+		wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+}
 
 #define VHOST_LOG_PAGE	4096
 
@@ -252,7 +396,15 @@ struct virtio_net {
 static __rte_always_inline void
 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
 {
-	__sync_fetch_and_or_8(addr, (1U << nr));
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+	/*
+	 * __sync_ built-ins are deprecated, but __atomic_ ones
+	 * are sub-optimized in older GCC versions.
+	 */
+	__sync_fetch_and_or_1(addr, (1U << nr));
+#else
+	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
+#endif
 }
 
 static __rte_always_inline void
@@ -284,6 +436,103 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
 }
 
 static __rte_always_inline void
+vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	unsigned long *log_base;
+	int i;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base))
+		return;
+
+	log_base = (unsigned long *)(uintptr_t)dev->log_base;
+
+	/*
+	 * It is expected a write memory barrier has been issued
+	 * before this function is called.
+	 */
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+		/*
+		 * '__sync' builtins are deprecated, but '__atomic' ones
+		 * are sub-optimized in older GCC versions.
+		 */
+		__sync_fetch_and_or(log_base + elem->offset, elem->val);
+#else
+		__atomic_fetch_or(log_base + elem->offset, elem->val,
+				__ATOMIC_RELAXED);
+#endif
+	}
+
+	rte_smp_wmb();
+
+	vq->log_cache_nb_elem = 0;
+}
+
+static __rte_always_inline void
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t page)
+{
+	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
+	uint32_t offset = page / (sizeof(unsigned long) << 3);
+	int i;
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+		if (elem->offset == offset) {
+			elem->val |= (1UL << bit_nr);
+			return;
+		}
+	}
+
+	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
+		/*
+		 * No more room for a new log cache entry,
+		 * so write the dirty log map directly.
+		 */
+		rte_smp_wmb();
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+		return;
+	}
+
+	vq->log_cache[i].offset = offset;
+	vq->log_cache[i].val = (1UL << bit_nr);
+	vq->log_cache_nb_elem++;
+}
+
+static __rte_always_inline void
+vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_cache_page(dev, vq, page);
+		page += 1;
+	}
+}
+
+static __rte_always_inline void
+vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t offset, uint64_t len)
+{
+	vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len);
+}
+
+static __rte_always_inline void
 vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		     uint64_t offset, uint64_t len)
 {
@@ -296,8 +545,8 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 #ifdef RTE_LIBRTE_VHOST_DEBUG
 #define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) \
+	RTE_LOG(DEBUG, log_type, fmt, ##args)
 #define PRINT_PACKET(device, addr, size, header) do { \
 	char *pkt_addr = (char *)(addr); \
 	unsigned int index; \
@@ -313,11 +562,10 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	} \
 	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
 	\
-	LOG_DEBUG(VHOST_DATA, "%s", packet); \
+	VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \
 } while (0)
 #else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0)
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
@@ -345,18 +593,33 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 	return 0;
 }
 
-struct virtio_net *get_device(int vid);
+static __rte_always_inline struct virtio_net *
+get_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) device not found.\n", vid);
+	}
+
+	return dev;
+}
 
 int vhost_new_device(void);
 void cleanup_device(struct virtio_net *dev, int destroy);
 void reset_device(struct virtio_net *dev);
 void vhost_destroy_device(int);
+void vhost_destroy_device_notify(struct virtio_net *dev);
 
 void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
-void free_vq(struct vhost_virtqueue *vq);
+void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 
+void vhost_attach_vdpa_device(int vid, int did);
+void vhost_detach_vdpa_device(int vid);
+
 void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
 void vhost_enable_dequeue_zero_copy(int vid);
 void vhost_set_builtin_virtio_net(int vid, bool enable);
@@ -371,18 +634,18 @@ struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
 void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t iova, uint64_t size, uint8_t perm);
+			uint64_t iova, uint64_t *len, uint8_t perm);
 int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t iova, uint64_t size, uint8_t perm)
+			uint64_t iova, uint64_t *len, uint8_t perm)
 {
 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-		return rte_vhost_gpa_to_vva(dev->mem, iova);
+		return rte_vhost_va_from_guest_pa(dev->mem, iova, len);
 
-	return __vhost_iova_to_vva(dev, vq, iova, size, perm);
+	return __vhost_iova_to_vva(dev, vq, iova, len, perm);
 }
 
 #define vhost_used_event(vr) \
@@ -401,17 +664,17 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
 }
 
 static __rte_always_inline void
-vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
+vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	/* Flush used->idx update before we read avail->flags. */
-	rte_mb();
+	rte_smp_mb();
 
 	/* Don't kick guest if we don't reach index specified by guest. */
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
 		uint16_t old = vq->signalled_used;
 		uint16_t new = vq->last_used_idx;
 
-		LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
+		VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
 			__func__,
 			vhost_used_event(vq),
 			old, new);
@@ -428,4 +691,55 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	}
 }
 
+static __rte_always_inline void
+vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint16_t old, new, off, off_wrap;
+	bool signalled_used_valid, kick = false;
+
+	/* Flush used desc update. */
+	rte_smp_mb();
+
+	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+		if (vq->driver_event->flags !=
+				VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	old = vq->signalled_used;
+	new = vq->last_used_idx;
+	vq->signalled_used = new;
+	signalled_used_valid = vq->signalled_used_valid;
+	vq->signalled_used_valid = true;
+
+	if (vq->driver_event->flags != VRING_EVENT_F_DESC) {
+		if (vq->driver_event->flags != VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	if (unlikely(!signalled_used_valid)) {
+		kick = true;
+		goto kick;
+	}
+
+	rte_smp_rmb();
+
+	off_wrap = vq->driver_event->off_wrap;
+	off = off_wrap & ~(1 << 15);
+
+	if (new <= old)
+		old -= vq->size;
+
+	if (vq->used_wrap_counter != off_wrap >> 15)
+		off -= vq->size;
+
+	if (vhost_need_event(off, new, old))
+		kick = true;
+kick:
+	if (kick)
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
new file mode 100644
index 00000000..57341ef8
--- /dev/null
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -0,0 +1,1372 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+#include <rte_malloc.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_mbuf.h>
+#include <rte_cryptodev.h>
+
+#include "rte_vhost_crypto.h"
+#include "vhost.h"
+#include "vhost_user.h"
+#include "virtio_crypto.h"
+
+#define INHDR_LEN		(sizeof(struct virtio_crypto_inhdr))
+#define IV_OFFSET		(sizeof(struct rte_crypto_op) + \
+				sizeof(struct rte_crypto_sym_op))
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VC_LOG_ERR(fmt, args...)				\
+	RTE_LOG(ERR, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+#define VC_LOG_INFO(fmt, args...)				\
+	RTE_LOG(INFO, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+
+#define VC_LOG_DBG(fmt, args...)				\
+	RTE_LOG(DEBUG, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+#else
+#define VC_LOG_ERR(fmt, args...)				\
+	RTE_LOG(ERR, USER1, "[VHOST-Crypto]: " fmt "\n", ## args)
+#define VC_LOG_INFO(fmt, args...)				\
+	RTE_LOG(INFO, USER1, "[VHOST-Crypto]: " fmt "\n", ## args)
+#define VC_LOG_DBG(fmt, args...)
+#endif
+
+#define VIRTIO_CRYPTO_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |	\
+		(1 << VIRTIO_RING_F_INDIRECT_DESC) |			\
+		(1 << VIRTIO_RING_F_EVENT_IDX) |			\
+		(1 << VIRTIO_CRYPTO_SERVICE_CIPHER) |			\
+		(1 << VIRTIO_CRYPTO_SERVICE_MAC) |			\
+		(1 << VIRTIO_NET_F_CTRL_VQ))
+
+#define IOVA_TO_VVA(t, r, a, l, p)					\
+	((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p))
+
+static int
+cipher_algo_transform(uint32_t virtio_cipher_algo)
+{
+	int ret;
+
+	switch (virtio_cipher_algo) {
+	case VIRTIO_CRYPTO_CIPHER_AES_CBC:
+		ret = RTE_CRYPTO_CIPHER_AES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+		ret = RTE_CRYPTO_CIPHER_AES_CTR;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_DES_ECB:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_DES_CBC:
+		ret = RTE_CRYPTO_CIPHER_DES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
+		ret = RTE_CRYPTO_CIPHER_3DES_ECB;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
+		ret = RTE_CRYPTO_CIPHER_3DES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
+		ret = RTE_CRYPTO_CIPHER_3DES_CTR;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_KASUMI_F8:
+		ret = RTE_CRYPTO_CIPHER_KASUMI_F8;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2:
+		ret = RTE_CRYPTO_CIPHER_SNOW3G_UEA2;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_F8:
+		ret = RTE_CRYPTO_CIPHER_AES_F8;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_XTS:
+		ret = RTE_CRYPTO_CIPHER_AES_XTS;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_ZUC_EEA3:
+		ret = RTE_CRYPTO_CIPHER_ZUC_EEA3;
+		break;
+	default:
+		ret = -VIRTIO_CRYPTO_BADMSG;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+auth_algo_transform(uint32_t virtio_auth_algo)
+{
+	int ret;
+
+	switch (virtio_auth_algo) {
+
+	case VIRTIO_CRYPTO_NO_MAC:
+		ret = RTE_CRYPTO_AUTH_NULL;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_MD5:
+		ret = RTE_CRYPTO_AUTH_MD5_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA1:
+		ret = RTE_CRYPTO_AUTH_SHA1_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_224:
+		ret = RTE_CRYPTO_AUTH_SHA224_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_256:
+		ret = RTE_CRYPTO_AUTH_SHA256_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_384:
+		ret = RTE_CRYPTO_AUTH_SHA384_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_512:
+		ret = RTE_CRYPTO_AUTH_SHA512_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_CMAC_3DES:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_CMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_CMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_KASUMI_F9:
+		ret = RTE_CRYPTO_AUTH_KASUMI_F9;
+		break;
+	case VIRTIO_CRYPTO_MAC_SNOW3G_UIA2:
+		ret = RTE_CRYPTO_AUTH_SNOW3G_UIA2;
+		break;
+	case VIRTIO_CRYPTO_MAC_GMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_GMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_GMAC_TWOFISH:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_CBCMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_CBC_MAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_XCBC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_XCBC_MAC;
+		break;
+	default:
+		ret = -VIRTIO_CRYPTO_BADMSG;
+		break;
+	}
+
+	return ret;
+}
+
+static int get_iv_len(enum rte_crypto_cipher_algorithm algo)
+{
+	int len;
+
+	switch (algo) {
+	case RTE_CRYPTO_CIPHER_3DES_CBC:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_3DES_CTR:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_3DES_ECB:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_AES_CBC:
+		len = 16;
+		break;
+
+	/* TODO: add common algos */
+
+	default:
+		len = -1;
+		break;
+	}
+
+	return len;
+}
+
+/**
+ * vhost_crypto struct is used to maintain a number of virtio_cryptos and
+ * one DPDK crypto device that deals with all crypto workloads. It is declared
+ * here and defined in vhost_crypto.c
+ */
+struct vhost_crypto {
+	/** Used to lookup DPDK Cryptodev Session based on VIRTIO crypto
+	 *  session ID.
+	 */
+	struct rte_hash *session_map;
+	struct rte_mempool *mbuf_pool;
+	struct rte_mempool *sess_pool;
+
+	/** DPDK cryptodev ID */
+	uint8_t cid;
+	uint16_t nb_qps;
+
+	uint64_t last_session_id;
+
+	uint64_t cache_session_id;
+	struct rte_cryptodev_sym_session *cache_session;
+	/** socket id for the device */
+	int socket_id;
+
+	struct virtio_net *dev;
+
+	uint8_t option;
+} __rte_cache_aligned;
+
+struct vhost_crypto_data_req {
+	struct vring_desc *head;
+	struct virtio_net *dev;
+	struct virtio_crypto_inhdr *inhdr;
+	struct vhost_virtqueue *vq;
+	struct vring_desc *wb_desc;
+	uint16_t wb_len;
+	uint16_t desc_idx;
+	uint16_t len;
+	uint16_t zero_copy;
+};
+
+static int
+transform_cipher_param(struct rte_crypto_sym_xform *xform,
+		VhostUserCryptoSessionParam *param)
+{
+	int ret;
+
+	ret = cipher_algo_transform(param->cipher_algo);
+	if (unlikely(ret < 0))
+		return ret;
+
+	xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+	xform->cipher.algo = (uint32_t)ret;
+	xform->cipher.key.length = param->cipher_key_len;
+	if (xform->cipher.key.length > 0)
+		xform->cipher.key.data = param->cipher_key_buf;
+	if (param->dir == VIRTIO_CRYPTO_OP_ENCRYPT)
+		xform->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+	else if (param->dir == VIRTIO_CRYPTO_OP_DECRYPT)
+		xform->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+	else {
+		VC_LOG_DBG("Bad operation type");
+		return -VIRTIO_CRYPTO_BADMSG;
+	}
+
+	ret = get_iv_len(xform->cipher.algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform->cipher.iv.length = (uint16_t)ret;
+	xform->cipher.iv.offset = IV_OFFSET;
+	return 0;
+}
+
+static int
+transform_chain_param(struct rte_crypto_sym_xform *xforms,
+		VhostUserCryptoSessionParam *param)
+{
+	struct rte_crypto_sym_xform *xform_cipher, *xform_auth;
+	int ret;
+
+	switch (param->chaining_dir) {
+	case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER:
+		xform_auth = xforms;
+		xform_cipher = xforms->next;
+		xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+		xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
+		break;
+	case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH:
+		xform_cipher = xforms;
+		xform_auth = xforms->next;
+		xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+		xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
+		break;
+	default:
+		return -VIRTIO_CRYPTO_BADMSG;
+	}
+
+	/* cipher */
+	ret = cipher_algo_transform(param->cipher_algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+	xform_cipher->cipher.algo = (uint32_t)ret;
+	xform_cipher->cipher.key.length = param->cipher_key_len;
+	xform_cipher->cipher.key.data = param->cipher_key_buf;
+	ret = get_iv_len(xform_cipher->cipher.algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_cipher->cipher.iv.length = (uint16_t)ret;
+	xform_cipher->cipher.iv.offset = IV_OFFSET;
+
+	/* auth */
+	xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+	ret = auth_algo_transform(param->hash_algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_auth->auth.algo = (uint32_t)ret;
+	xform_auth->auth.digest_length = param->digest_len;
+	xform_auth->auth.key.length = param->auth_key_len;
+	xform_auth->auth.key.data = param->auth_key_buf;
+
+	return 0;
+}
+
+static void
+vhost_crypto_create_sess(struct vhost_crypto *vcrypto,
+		VhostUserCryptoSessionParam *sess_param)
+{
+	struct rte_crypto_sym_xform xform1 = {0}, xform2 = {0};
+	struct rte_cryptodev_sym_session *session;
+	int ret;
+
+	switch (sess_param->op_type) {
+	case VIRTIO_CRYPTO_SYM_OP_NONE:
+	case VIRTIO_CRYPTO_SYM_OP_CIPHER:
+		ret = transform_cipher_param(&xform1, sess_param);
+		if (unlikely(ret)) {
+			VC_LOG_ERR("Error transform session msg (%i)", ret);
+			sess_param->session_id = ret;
+			return;
+		}
+		break;
+	case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
+		if (unlikely(sess_param->hash_mode !=
+				VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH)) {
+			sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP;
+			VC_LOG_ERR("Error transform session message (%i)",
+					-VIRTIO_CRYPTO_NOTSUPP);
+			return;
+		}
+
+		xform1.next = &xform2;
+
+		ret = transform_chain_param(&xform1, sess_param);
+		if (unlikely(ret)) {
+			VC_LOG_ERR("Error transform session message (%i)", ret);
+			sess_param->session_id = ret;
+			return;
+		}
+
+		break;
+	default:
+		VC_LOG_ERR("Algorithm not yet supported");
+		sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP;
+		return;
+	}
+
+	session = rte_cryptodev_sym_session_create(vcrypto->sess_pool);
+	if (!session) {
+		VC_LOG_ERR("Failed to create session");
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	if (rte_cryptodev_sym_session_init(vcrypto->cid, session, &xform1,
+			vcrypto->sess_pool) < 0) {
+		VC_LOG_ERR("Failed to initialize session");
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	/* insert hash to map */
+	if (rte_hash_add_key_data(vcrypto->session_map,
+			&vcrypto->last_session_id, session) < 0) {
+		VC_LOG_ERR("Failed to insert session to hash table");
+
+		if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0)
+			VC_LOG_ERR("Failed to clear session");
+		else {
+			if (rte_cryptodev_sym_session_free(session) < 0)
+				VC_LOG_ERR("Failed to free session");
+		}
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	VC_LOG_INFO("Session %"PRIu64" created for vdev %i.",
+			vcrypto->last_session_id, vcrypto->dev->vid);
+
+	sess_param->session_id = vcrypto->last_session_id;
+	vcrypto->last_session_id++;
+}
+
+static int
+vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id)
+{
+	struct rte_cryptodev_sym_session *session;
+	uint64_t sess_id = session_id;
+	int ret;
+
+	ret = rte_hash_lookup_data(vcrypto->session_map, &sess_id,
+			(void **)&session);
+
+	if (unlikely(ret < 0)) {
+		VC_LOG_ERR("Failed to delete session %"PRIu64".", session_id);
+		return -VIRTIO_CRYPTO_INVSESS;
+	}
+
+	if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) {
+		VC_LOG_DBG("Failed to clear session");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	if (rte_cryptodev_sym_session_free(session) < 0) {
+		VC_LOG_DBG("Failed to free session");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	if (rte_hash_del_key(vcrypto->session_map, &sess_id) < 0) {
+		VC_LOG_DBG("Failed to delete session from hash table.");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	VC_LOG_INFO("Session %"PRIu64" deleted for vdev %i.", sess_id,
+			vcrypto->dev->vid);
+
+	return 0;
+}
+
+static int
+vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+	VhostUserMsg *vmsg = msg;
+	int ret = 0;
+
+	if (dev == NULL || require_reply == NULL) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	vcrypto = dev->extern_data;
+	if (vcrypto == NULL) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	*require_reply = 0;
+
+	if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) {
+		vhost_crypto_create_sess(vcrypto,
+				&vmsg->payload.crypto_session);
+		*require_reply = 1;
+	} else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS)
+		ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64);
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static __rte_always_inline struct vring_desc *
+find_write_desc(struct vring_desc *head, struct vring_desc *desc)
+{
+	if (desc->flags & VRING_DESC_F_WRITE)
+		return desc;
+
+	while (desc->flags & VRING_DESC_F_NEXT) {
+		desc = &head[desc->next];
+		if (desc->flags & VRING_DESC_F_WRITE)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static struct virtio_crypto_inhdr *
+reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
+{
+	uint64_t dlen;
+	struct virtio_crypto_inhdr *inhdr;
+
+	while (desc->flags & VRING_DESC_F_NEXT)
+		desc = &vc_req->head[desc->next];
+
+	dlen = desc->len;
+	inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr,
+			&dlen, VHOST_ACCESS_WO);
+	if (unlikely(!inhdr || dlen != desc->len))
+		return NULL;
+
+	return inhdr;
+}
+
+static __rte_always_inline int
+move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
+		uint32_t size)
+{
+	struct vring_desc *desc = *cur_desc;
+	int left = size;
+
+	rte_prefetch0(&head[desc->next]);
+	left -= desc->len;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &head[desc->next];
+		rte_prefetch0(&head[desc->next]);
+		left -= desc->len;
+	}
+
+	if (unlikely(left > 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	*cur_desc = &head[desc->next];
+	return 0;
+}
+
+static int
+copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
+		struct vring_desc **cur_desc, uint32_t size)
+{
+	struct vring_desc *desc = *cur_desc;
+	uint64_t remain, addr, dlen, len;
+	uint32_t to_copy;
+	uint8_t *data = dst_data;
+	uint8_t *src;
+	int left = size;
+
+	rte_prefetch0(&vc_req->head[desc->next]);
+	to_copy = RTE_MIN(desc->len, (uint32_t)left);
+	dlen = to_copy;
+	src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+			VHOST_ACCESS_RO);
+	if (unlikely(!src || !dlen)) {
+		VC_LOG_ERR("Failed to map descriptor");
+		return -1;
+	}
+
+	rte_memcpy((uint8_t *)data, src, dlen);
+	data += dlen;
+
+	if (unlikely(dlen < to_copy)) {
+		remain = to_copy - dlen;
+		addr = desc->addr + dlen;
+
+		while (remain) {
+			len = remain;
+			src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len,
+					VHOST_ACCESS_RO);
+			if (unlikely(!src || !len)) {
+				VC_LOG_ERR("Failed to map descriptor");
+				return -1;
+			}
+
+			rte_memcpy(data, src, len);
+			addr += len;
+			remain -= len;
+			data += len;
+		}
+	}
+
+	left -= to_copy;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &vc_req->head[desc->next];
+		rte_prefetch0(&vc_req->head[desc->next]);
+		to_copy = RTE_MIN(desc->len, (uint32_t)left);
+		dlen = desc->len;
+		src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !dlen)) {
+			VC_LOG_ERR("Failed to map descriptor");
+			return -1;
+		}
+
+		rte_memcpy(data, src, dlen);
+		data += dlen;
+
+		if (unlikely(dlen < to_copy)) {
+			remain = to_copy - dlen;
+			addr = desc->addr + dlen;
+
+			while (remain) {
+				len = remain;
+				src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len,
+						VHOST_ACCESS_RO);
+				if (unlikely(!src || !len)) {
+					VC_LOG_ERR("Failed to map descriptor");
+					return -1;
+				}
+
+				rte_memcpy(data, src, len);
+				addr += len;
+				remain -= len;
+				data += len;
+			}
+		}
+
+		left -= to_copy;
+	}
+
+	if (unlikely(left > 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	*cur_desc = &vc_req->head[desc->next];
+
+	return 0;
+}
+
+static __rte_always_inline void *
+get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc,
+		uint32_t size, uint8_t perm)
+{
+	void *data;
+	uint64_t dlen = (*cur_desc)->len;
+
+	data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm);
+	if (unlikely(!data || dlen != (*cur_desc)->len)) {
+		VC_LOG_ERR("Failed to map object");
+		return NULL;
+	}
+
+	if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0))
+		return NULL;
+
+	return data;
+}
+
+static int
+write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req)
+{
+	struct rte_mbuf *mbuf = op->sym->m_dst;
+	struct vring_desc *head = vc_req->head;
+	struct vring_desc *desc = vc_req->wb_desc;
+	int left = vc_req->wb_len;
+	uint32_t to_write;
+	uint8_t *src_data = mbuf->buf_addr, *dst;
+	uint64_t dlen;
+
+	rte_prefetch0(&head[desc->next]);
+	to_write = RTE_MIN(desc->len, (uint32_t)left);
+	dlen = desc->len;
+	dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+			VHOST_ACCESS_RW);
+	if (unlikely(!dst || dlen != desc->len)) {
+		VC_LOG_ERR("Failed to map descriptor");
+		return -1;
+	}
+
+	rte_memcpy(dst, src_data, to_write);
+	left -= to_write;
+	src_data += to_write;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &head[desc->next];
+		rte_prefetch0(&head[desc->next]);
+		to_write = RTE_MIN(desc->len, (uint32_t)left);
+		dlen = desc->len;
+		dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+				VHOST_ACCESS_RW);
+		if (unlikely(!dst || dlen != desc->len)) {
+			VC_LOG_ERR("Failed to map descriptor");
+			return -1;
+		}
+
+		rte_memcpy(dst, src_data, to_write);
+		left -= to_write;
+		src_data += to_write;
+	}
+
+	if (unlikely(left < 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	return 0;
+}
+
+static uint8_t
+prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
+		struct vhost_crypto_data_req *vc_req,
+		struct virtio_crypto_cipher_data_req *cipher,
+		struct vring_desc *cur_desc)
+{
+	struct vring_desc *desc = cur_desc;
+	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
+	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
+	uint8_t ret = 0;
+
+	/* prepare */
+	/* iv */
+	if (unlikely(copy_data(iv_data, vc_req, &desc,
+			cipher->para.iv_len) < 0)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	m_src->data_len = cipher->para.src_data_len;
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
+				cipher->para.src_data_len);
+		m_src->buf_addr = get_data_ptr(vc_req, &desc,
+				cipher->para.src_data_len, VHOST_ACCESS_RO);
+		if (unlikely(m_src->buf_iova == 0 ||
+				m_src->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		if (unlikely(cipher->para.src_data_len >
+				RTE_MBUF_DEFAULT_BUF_SIZE)) {
+			VC_LOG_ERR("Not enough space to do data copy");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
+				vc_req, &desc, cipher->para.src_data_len)
+				< 0)) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* dst */
+	desc = find_write_desc(vc_req->head, desc);
+	if (unlikely(!desc)) {
+		VC_LOG_ERR("Cannot find write location");
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
+				desc->addr, cipher->para.dst_data_len);
+		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
+				cipher->para.dst_data_len, VHOST_ACCESS_RW);
+		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
+		m_dst->data_len = cipher->para.dst_data_len;
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		vc_req->wb_desc = desc;
+		vc_req->wb_len = cipher->para.dst_data_len;
+		if (unlikely(move_desc(vc_req->head, &desc,
+				vc_req->wb_len) < 0)) {
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* src data */
+	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+
+	op->sym->cipher.data.offset = 0;
+	op->sym->cipher.data.length = cipher->para.src_data_len;
+
+	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	if (unlikely(vc_req->inhdr == NULL)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	vc_req->inhdr->status = VIRTIO_CRYPTO_OK;
+	vc_req->len = cipher->para.dst_data_len + INHDR_LEN;
+
+	return 0;
+
+error_exit:
+	vc_req->len = INHDR_LEN;
+	return ret;
+}
+
+static uint8_t
+prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
+		struct vhost_crypto_data_req *vc_req,
+		struct virtio_crypto_alg_chain_data_req *chain,
+		struct vring_desc *cur_desc)
+{
+	struct vring_desc *desc = cur_desc;
+	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
+	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
+	uint32_t digest_offset;
+	void *digest_addr;
+	uint8_t ret = 0;
+
+	/* prepare */
+	/* iv */
+	if (unlikely(copy_data(iv_data, vc_req, &desc,
+			chain->para.iv_len) < 0)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	m_src->data_len = chain->para.src_data_len;
+	m_dst->data_len = chain->para.dst_data_len;
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
+				chain->para.src_data_len);
+		m_src->buf_addr = get_data_ptr(vc_req, &desc,
+				chain->para.src_data_len, VHOST_ACCESS_RO);
+		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		if (unlikely(chain->para.src_data_len >
+				RTE_MBUF_DEFAULT_BUF_SIZE)) {
+			VC_LOG_ERR("Not enough space to do data copy");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
+				vc_req, &desc, chain->para.src_data_len)) < 0) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* dst */
+	desc = find_write_desc(vc_req->head, desc);
+	if (unlikely(!desc)) {
+		VC_LOG_ERR("Cannot find write location");
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
+				desc->addr, chain->para.dst_data_len);
+		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
+				chain->para.dst_data_len, VHOST_ACCESS_RW);
+		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
+		op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev,
+				desc->addr, chain->para.hash_result_len);
+		op->sym->auth.digest.data = get_data_ptr(vc_req, &desc,
+				chain->para.hash_result_len, VHOST_ACCESS_RW);
+		if (unlikely(op->sym->auth.digest.phys_addr == 0)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		digest_offset = m_dst->data_len;
+		digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *,
+				digest_offset);
+
+		vc_req->wb_desc = desc;
+		vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len;
+
+		if (unlikely(move_desc(vc_req->head, &desc,
+				chain->para.dst_data_len) < 0)) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+
+		if (unlikely(copy_data(digest_addr, vc_req, &desc,
+				chain->para.hash_result_len)) < 0) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+
+		op->sym->auth.digest.data = digest_addr;
+		op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst,
+				digest_offset);
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* record inhdr */
+	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	if (unlikely(vc_req->inhdr == NULL)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	vc_req->inhdr->status = VIRTIO_CRYPTO_OK;
+
+	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+
+	op->sym->cipher.data.offset = chain->para.cipher_start_src_offset;
+	op->sym->cipher.data.length = chain->para.src_data_len -
+			chain->para.cipher_start_src_offset;
+
+	op->sym->auth.data.offset = chain->para.hash_start_src_offset;
+	op->sym->auth.data.length = chain->para.len_to_hash;
+
+	vc_req->len = chain->para.dst_data_len + chain->para.hash_result_len +
+			INHDR_LEN;
+	return 0;
+
+error_exit:
+	vc_req->len = INHDR_LEN;
+	return ret;
+}
+
+/**
+ * Process on descriptor
+ */
+static __rte_always_inline int
+vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
+		struct vhost_virtqueue *vq, struct rte_crypto_op *op,
+		struct vring_desc *head, uint16_t desc_idx)
+{
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src);
+	struct rte_cryptodev_sym_session *session;
+	struct virtio_crypto_op_data_req *req, tmp_req;
+	struct virtio_crypto_inhdr *inhdr;
+	struct vring_desc *desc = NULL;
+	uint64_t session_id;
+	uint64_t dlen;
+	int err = 0;
+
+	vc_req->desc_idx = desc_idx;
+	vc_req->dev = vcrypto->dev;
+	vc_req->vq = vq;
+
+	if (likely(head->flags & VRING_DESC_F_INDIRECT)) {
+		dlen = head->len;
+		desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr,
+				&dlen, VHOST_ACCESS_RO);
+		if (unlikely(!desc || dlen != head->len))
+			return -1;
+		desc_idx = 0;
+		head = desc;
+	} else {
+		desc = head;
+	}
+
+	vc_req->head = head;
+	vc_req->zero_copy = vcrypto->option;
+
+	req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO);
+	if (unlikely(req == NULL)) {
+		switch (vcrypto->option) {
+		case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+			err = VIRTIO_CRYPTO_BADMSG;
+			VC_LOG_ERR("Invalid descriptor");
+			goto error_exit;
+		case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+			req = &tmp_req;
+			if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req))
+					< 0)) {
+				err = VIRTIO_CRYPTO_BADMSG;
+				VC_LOG_ERR("Invalid descriptor");
+				goto error_exit;
+			}
+			break;
+		default:
+			err = VIRTIO_CRYPTO_ERR;
+			VC_LOG_ERR("Invalid option");
+			goto error_exit;
+		}
+	}
+
+	switch (req->header.opcode) {
+	case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
+	case VIRTIO_CRYPTO_CIPHER_DECRYPT:
+		session_id = req->header.session_id;
+
+		/* one branch to avoid unnecessary table lookup */
+		if (vcrypto->cache_session_id != session_id) {
+			err = rte_hash_lookup_data(vcrypto->session_map,
+					&session_id, (void **)&session);
+			if (unlikely(err < 0)) {
+				err = VIRTIO_CRYPTO_ERR;
+				VC_LOG_ERR("Failed to find session %"PRIu64,
+						session_id);
+				goto error_exit;
+			}
+
+			vcrypto->cache_session = session;
+			vcrypto->cache_session_id = session_id;
+		}
+
+		session = vcrypto->cache_session;
+
+		err = rte_crypto_op_attach_sym_session(op, session);
+		if (unlikely(err < 0)) {
+			err = VIRTIO_CRYPTO_ERR;
+			VC_LOG_ERR("Failed to attach session to op");
+			goto error_exit;
+		}
+
+		switch (req->u.sym_req.op_type) {
+		case VIRTIO_CRYPTO_SYM_OP_NONE:
+			err = VIRTIO_CRYPTO_NOTSUPP;
+			break;
+		case VIRTIO_CRYPTO_SYM_OP_CIPHER:
+			err = prepare_sym_cipher_op(vcrypto, op, vc_req,
+					&req->u.sym_req.u.cipher, desc);
+			break;
+		case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
+			err = prepare_sym_chain_op(vcrypto, op, vc_req,
+					&req->u.sym_req.u.chain, desc);
+			break;
+		}
+		if (unlikely(err != 0)) {
+			VC_LOG_ERR("Failed to process sym request");
+			goto error_exit;
+		}
+		break;
+	default:
+		VC_LOG_ERR("Unsupported symmetric crypto request type %u",
+				req->header.opcode);
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+
+	inhdr = reach_inhdr(vc_req, desc);
+	if (likely(inhdr != NULL))
+		inhdr->status = (uint8_t)err;
+
+	return -1;
+}
+
+static __rte_always_inline struct vhost_virtqueue *
+vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
+		struct vhost_virtqueue *old_vq)
+{
+	struct rte_mbuf *m_src = op->sym->m_src;
+	struct rte_mbuf *m_dst = op->sym->m_dst;
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src);
+	uint16_t desc_idx;
+	int ret = 0;
+
+	if (unlikely(!vc_req)) {
+		VC_LOG_ERR("Failed to retrieve vc_req");
+		return NULL;
+	}
+
+	if (old_vq && (vc_req->vq != old_vq))
+		return vc_req->vq;
+
+	desc_idx = vc_req->desc_idx;
+
+	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+		vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
+	else {
+		if (vc_req->zero_copy == 0) {
+			ret = write_back_data(op, vc_req);
+			if (unlikely(ret != 0))
+				vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
+		}
+	}
+
+	vc_req->vq->used->ring[desc_idx].id = desc_idx;
+	vc_req->vq->used->ring[desc_idx].len = vc_req->len;
+
+	rte_mempool_put(m_dst->pool, (void *)m_dst);
+	rte_mempool_put(m_src->pool, (void *)m_src);
+
+	return vc_req->vq;
+}
+
+static __rte_always_inline uint16_t
+vhost_crypto_complete_one_vm_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfd)
+{
+	uint16_t processed = 1;
+	struct vhost_virtqueue *vq, *tmp_vq;
+
+	if (unlikely(nb_ops == 0))
+		return 0;
+
+	vq = vhost_crypto_finalize_one_request(ops[0], NULL);
+	if (unlikely(vq == NULL))
+		return 0;
+	tmp_vq = vq;
+
+	while ((processed < nb_ops)) {
+		tmp_vq = vhost_crypto_finalize_one_request(ops[processed],
+				tmp_vq);
+
+		if (unlikely(vq != tmp_vq))
+			break;
+
+		processed++;
+	}
+
+	*callfd = vq->callfd;
+
+	*(volatile uint16_t *)&vq->used->idx += processed;
+
+	return processed;
+}
+
+int __rte_experimental
+rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
+		struct rte_mempool *sess_pool, int socket_id)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct rte_hash_parameters params = {0};
+	struct vhost_crypto *vcrypto;
+	char name[128];
+	int ret;
+
+	if (!dev) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	ret = rte_vhost_driver_set_features(dev->ifname,
+			VIRTIO_CRYPTO_FEATURES);
+	if (ret < 0) {
+		VC_LOG_ERR("Error setting features");
+		return -1;
+	}
+
+	vcrypto = rte_zmalloc_socket(NULL, sizeof(*vcrypto),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (!vcrypto) {
+		VC_LOG_ERR("Insufficient memory");
+		return -ENOMEM;
+	}
+
+	vcrypto->sess_pool = sess_pool;
+	vcrypto->cid = cryptodev_id;
+	vcrypto->cache_session_id = UINT64_MAX;
+	vcrypto->last_session_id = 1;
+	vcrypto->dev = dev;
+	vcrypto->option = RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE;
+
+	snprintf(name, 127, "HASH_VHOST_CRYPT_%u", (uint32_t)vid);
+	params.name = name;
+	params.entries = VHOST_CRYPTO_SESSION_MAP_ENTRIES;
+	params.hash_func = rte_jhash;
+	params.key_len = sizeof(uint64_t);
+	params.socket_id = socket_id;
+	vcrypto->session_map = rte_hash_create(&params);
+	if (!vcrypto->session_map) {
+		VC_LOG_ERR("Failed to creath session map");
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+
+	snprintf(name, 127, "MBUF_POOL_VM_%u", (uint32_t)vid);
+	vcrypto->mbuf_pool = rte_pktmbuf_pool_create(name,
+			VHOST_CRYPTO_MBUF_POOL_SIZE, 512,
+			sizeof(struct vhost_crypto_data_req),
+			RTE_MBUF_DEFAULT_DATAROOM * 2 + RTE_PKTMBUF_HEADROOM,
+			rte_socket_id());
+	if (!vcrypto->mbuf_pool) {
+		VC_LOG_ERR("Failed to creath mbuf pool");
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+
+	dev->extern_data = vcrypto;
+	dev->extern_ops.pre_msg_handle = NULL;
+	dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler;
+
+	return 0;
+
+error_exit:
+	if (vcrypto->session_map)
+		rte_hash_free(vcrypto->session_map);
+	if (vcrypto->mbuf_pool)
+		rte_mempool_free(vcrypto->mbuf_pool);
+
+	rte_free(vcrypto);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_vhost_crypto_free(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	vcrypto = dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	rte_hash_free(vcrypto->session_map);
+	rte_mempool_free(vcrypto->mbuf_pool);
+	rte_free(vcrypto);
+
+	dev->extern_data = NULL;
+	dev->extern_ops.pre_msg_handle = NULL;
+	dev->extern_ops.post_msg_handle = NULL;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	if (unlikely((uint32_t)option >=
+				RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS)) {
+		VC_LOG_ERR("Invalid option %i", option);
+		return -EINVAL;
+	}
+
+	vcrypto = (struct vhost_crypto *)dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	if (vcrypto->option == (uint8_t)option)
+		return 0;
+
+	if (!(rte_mempool_full(vcrypto->mbuf_pool))) {
+		VC_LOG_ERR("Cannot update zero copy as mempool is not full");
+		return -EINVAL;
+	}
+
+	vcrypto->option = (uint8_t)option;
+
+	return 0;
+}
+
+uint16_t __rte_experimental
+rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	struct rte_mbuf *mbufs[VHOST_CRYPTO_MAX_BURST_SIZE * 2];
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+	struct vhost_virtqueue *vq;
+	uint16_t avail_idx;
+	uint16_t start_idx;
+	uint16_t required;
+	uint16_t count;
+	uint16_t i;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	if (unlikely(qid >= VHOST_MAX_QUEUE_PAIRS)) {
+		VC_LOG_ERR("Invalid qid %u", qid);
+		return -EINVAL;
+	}
+
+	vcrypto = (struct vhost_crypto *)dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	vq = dev->virtqueue[qid];
+
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	start_idx = vq->last_used_idx;
+	count = avail_idx - start_idx;
+	count = RTE_MIN(count, VHOST_CRYPTO_MAX_BURST_SIZE);
+	count = RTE_MIN(count, nb_ops);
+
+	if (unlikely(count == 0))
+		return 0;
+
+	/* for zero copy, we need 2 empty mbufs for src and dst, otherwise
+	 * we need only 1 mbuf as src and dst
+	 */
+	required = count * 2;
+	if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs,
+			required) < 0)) {
+		VC_LOG_ERR("Insufficient memory");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < count; i++) {
+		uint16_t used_idx = (start_idx + i) & (vq->size - 1);
+		uint16_t desc_idx = vq->avail->ring[used_idx];
+		struct vring_desc *head = &vq->desc[desc_idx];
+		struct rte_crypto_op *op = ops[i];
+
+		op->sym->m_src = mbufs[i * 2];
+		op->sym->m_dst = mbufs[i * 2 + 1];
+		op->sym->m_src->data_off = 0;
+		op->sym->m_dst->data_off = 0;
+
+		if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head,
+				desc_idx)) < 0)
+			break;
+	}
+
+	vq->last_used_idx += i;
+
+	return i;
+}
+
+uint16_t __rte_experimental
+rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfds, uint16_t *nb_callfds)
+{
+	struct rte_crypto_op **tmp_ops = ops;
+	uint16_t count = 0, left = nb_ops;
+	int callfd;
+	uint16_t idx = 0;
+
+	while (left) {
+		count = vhost_crypto_complete_one_vm_requests(tmp_ops, left,
+				&callfd);
+		if (unlikely(count == 0))
+			break;
+
+		tmp_ops = &tmp_ops[count];
+		left -= count;
+
+		callfds[idx++] = callfd;
+
+		if (unlikely(idx >= VIRTIO_CRYPTO_MAX_NUM_BURST_VQS)) {
+			VC_LOG_ERR("Too many vqs");
+			break;
+		}
+	}
+
+	*nb_callfds = idx;
+
+	return nb_ops - left;
+}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 90ed2112..a2d4c9ff 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1,5 +1,22 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+/* Security model
+ * --------------
+ * The vhost-user protocol connection is an external interface, so it must be
+ * robust against invalid inputs.
+ *
+ * This is important because the vhost-user master is only one step removed
+ * from the guest.  Malicious guests that have escaped will then launch further
+ * attacks from the vhost-user master.
+ *
+ * Even in deployments where guests are trusted, a bug in the vhost-user master
+ * can still cause invalid messages to be sent.  Such messages must not
+ * compromise the stability of the DPDK application by causing crashes, memory
+ * corruption, or other problematic behavior.
+ *
+ * Do not assume received VhostUserMsg fields contain sensible values!
  */
 
 #include <stdint.h>
@@ -50,6 +67,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
+	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 };
 
 static uint64_t
@@ -116,10 +135,7 @@ vhost_user_set_owner(void)
 static int
 vhost_user_reset_owner(struct virtio_net *dev)
 {
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 0);
 	reset_device(dev);
@@ -139,12 +155,26 @@ vhost_user_get_features(struct virtio_net *dev)
 }
 
 /*
+ * The queue number that we support are requested.
+ */
+static uint32_t
+vhost_user_get_queue_num(struct virtio_net *dev)
+{
+	uint32_t queue_num = 0;
+
+	rte_vhost_driver_get_queue_num(dev->ifname, &queue_num);
+	return queue_num;
+}
+
+/*
  * We receive the negotiated features supported by us and the virtio device.
  */
 static int
 vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 {
 	uint64_t vhost_features = 0;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	rte_vhost_driver_get_features(dev->ifname, &vhost_features);
 	if (features & ~vhost_features) {
@@ -181,7 +211,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 	} else {
 		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
 	}
-	LOG_DEBUG(VHOST_CONFIG,
+	VHOST_LOG_DEBUG(VHOST_CONFIG,
 		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
 		dev->vid,
 		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
@@ -203,10 +233,15 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 
 			dev->virtqueue[dev->nr_vring] = NULL;
 			cleanup_vq(vq, 1);
-			free_vq(vq);
+			free_vq(dev, vq);
 		}
 	}
 
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->set_features)
+		vdpa_dev->ops->set_features(dev->vid);
+
 	return 0;
 }
 
@@ -221,6 +256,17 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 
 	vq->size = msg->payload.state.num;
 
+	/* VIRTIO 1.0, 2.4 Virtqueues says:
+	 *
+	 *   Queue Size value is always a power of 2. The maximum Queue Size
+	 *   value is 32768.
+	 */
+	if ((vq->size & (vq->size - 1)) || vq->size > 32768) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid virtqueue size %u\n", vq->size);
+		return -1;
+	}
+
 	if (dev->dequeue_zero_copy) {
 		vq->nr_zmbuf = 0;
 		vq->last_zmbuf_idx = 0;
@@ -236,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 		TAILQ_INIT(&vq->zmbuf_list);
 	}
 
-	vq->shadow_used_ring = rte_malloc(NULL,
+	if (vq_is_packed(dev)) {
+		vq->shadow_used_packed = rte_malloc(NULL,
+				vq->size *
+				sizeof(struct vring_used_elem_packed),
+				RTE_CACHE_LINE_SIZE);
+		if (!vq->shadow_used_packed) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
+
+	} else {
+		vq->shadow_used_split = rte_malloc(NULL,
 				vq->size * sizeof(struct vring_used_elem),
 				RTE_CACHE_LINE_SIZE);
-	if (!vq->shadow_used_ring) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"failed to allocate memory for shadow used ring.\n");
-		return -1;
+		if (!vq->shadow_used_split) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
 	}
 
 	vq->batch_copy_elems = rte_malloc(NULL,
@@ -269,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index)
 	struct virtio_net *old_dev;
 	struct vhost_virtqueue *old_vq, *vq;
 	struct zcopy_mbuf *new_zmbuf;
-	struct vring_used_elem *new_shadow_used_ring;
+	struct vring_used_elem *new_shadow_used_split;
+	struct vring_used_elem_packed *new_shadow_used_packed;
 	struct batch_copy_elem *new_batch_copy_elems;
 	int ret;
 
@@ -304,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index)
 			vq->zmbufs = new_zmbuf;
 		}
 
-		new_shadow_used_ring = rte_malloc_socket(NULL,
-			vq->size * sizeof(struct vring_used_elem),
-			RTE_CACHE_LINE_SIZE,
-			newnode);
-		if (new_shadow_used_ring) {
-			rte_free(vq->shadow_used_ring);
-			vq->shadow_used_ring = new_shadow_used_ring;
+		if (vq_is_packed(dev)) {
+			new_shadow_used_packed = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem_packed),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_packed) {
+				rte_free(vq->shadow_used_packed);
+				vq->shadow_used_packed = new_shadow_used_packed;
+			}
+		} else {
+			new_shadow_used_split = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_split) {
+				rte_free(vq->shadow_used_split);
+				vq->shadow_used_split = new_shadow_used_split;
+			}
 		}
 
 		new_batch_copy_elems = rte_malloc_socket(NULL,
@@ -366,21 +439,26 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
 
 /* Converts QEMU virtual address to Vhost virtual address. */
 static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qva)
+qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
 {
-	struct rte_vhost_mem_region *reg;
+	struct rte_vhost_mem_region *r;
 	uint32_t i;
 
 	/* Find the region where the address lives. */
 	for (i = 0; i < dev->mem->nregions; i++) {
-		reg = &dev->mem->regions[i];
+		r = &dev->mem->regions[i];
+
+		if (qva >= r->guest_user_addr &&
+		    qva <  r->guest_user_addr + r->size) {
 
-		if (qva >= reg->guest_user_addr &&
-		    qva <  reg->guest_user_addr + reg->size) {
-			return qva - reg->guest_user_addr +
-			       reg->host_user_addr;
+			if (unlikely(*len > r->guest_user_addr + r->size - qva))
+				*len = r->guest_user_addr + r->size - qva;
+
+			return qva - r->guest_user_addr +
+			       r->host_user_addr;
 		}
 	}
+	*len = 0;
 
 	return 0;
 }
@@ -393,20 +471,20 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
  */
 static uint64_t
 ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint64_t ra, uint64_t size)
+		uint64_t ra, uint64_t *size)
 {
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
 		uint64_t vva;
 
 		vva = vhost_user_iotlb_cache_find(vq, ra,
-					&size, VHOST_ACCESS_RW);
+					size, VHOST_ACCESS_RW);
 		if (!vva)
 			vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW);
 
 		return vva;
 	}
 
-	return qva_to_vva(dev, ra);
+	return qva_to_vva(dev, ra, size);
 }
 
 static struct virtio_net *
@@ -414,16 +492,63 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 {
 	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
 	struct vhost_vring_addr *addr = &vq->ring_addrs;
+	uint64_t len;
+
+	if (vq_is_packed(dev)) {
+		len = sizeof(struct vring_packed_desc) * vq->size;
+		vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+			ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len);
+		vq->log_guest_addr = 0;
+		if (vq->desc_packed == NULL ||
+				len != sizeof(struct vring_packed_desc) *
+				vq->size) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to map desc_packed ring.\n",
+				dev->vid);
+			return dev;
+		}
+
+		dev = numa_realloc(dev, vq_index);
+		vq = dev->virtqueue[vq_index];
+		addr = &vq->ring_addrs;
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->driver_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->avail_user_addr, &len);
+		if (vq->driver_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find driver area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->device_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->used_user_addr, &len);
+		if (vq->device_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find device area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		return dev;
+	}
 
 	/* The addresses are converted from QEMU virtual to Vhost virtual. */
 	if (vq->desc && vq->avail && vq->used)
 		return dev;
 
+	len = sizeof(struct vring_desc) * vq->size;
 	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->desc_user_addr, sizeof(struct vring_desc));
-	if (vq->desc == 0) {
+			vq, addr->desc_user_addr, &len);
+	if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find desc ring address.\n",
+			"(%d) failed to map desc ring.\n",
 			dev->vid);
 		return dev;
 	}
@@ -432,20 +557,26 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 	vq = dev->virtqueue[vq_index];
 	addr = &vq->ring_addrs;
 
+	len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
 	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->avail_user_addr, sizeof(struct vring_avail));
-	if (vq->avail == 0) {
+			vq, addr->avail_user_addr, &len);
+	if (vq->avail == 0 ||
+			len != sizeof(struct vring_avail) +
+			sizeof(uint16_t) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find avail ring address.\n",
+			"(%d) failed to map avail ring.\n",
 			dev->vid);
 		return dev;
 	}
 
+	len = sizeof(struct vring_used) +
+		sizeof(struct vring_used_elem) * vq->size;
 	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->used_user_addr, sizeof(struct vring_used));
-	if (vq->used == 0) {
+			vq, addr->used_user_addr, &len);
+	if (vq->used == 0 || len != sizeof(struct vring_used) +
+			sizeof(struct vring_used_elem) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find used ring address.\n",
+			"(%d) failed to map used ring.\n",
 			dev->vid);
 		return dev;
 	}
@@ -461,13 +592,13 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 
 	vq->log_guest_addr = addr->log_guest_addr;
 
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
 			dev->vid, vq->desc);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
 			dev->vid, vq->avail);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
 			dev->vid, vq->used);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
 			dev->vid, vq->log_guest_addr);
 
 	return dev;
@@ -500,7 +631,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
 
 	if (vq->enabled && (dev->features &
 				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
-		dev = translate_ring_addresses(dev, msg->payload.state.index);
+		dev = translate_ring_addresses(dev, msg->payload.addr.index);
 		if (!dev)
 			return -1;
 
@@ -525,7 +656,7 @@ vhost_user_set_vring_base(struct virtio_net *dev,
 	return 0;
 }
 
-static void
+static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		   uint64_t host_phys_addr, uint64_t size)
 {
@@ -535,6 +666,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		dev->max_guest_pages *= 2;
 		dev->guest_pages = realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page));
+		if (!dev->guest_pages) {
+			RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n");
+			return -1;
+		}
 	}
 
 	if (dev->nr_guest_pages > 0) {
@@ -543,7 +678,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		if (host_phys_addr == last_page->host_phys_addr +
 				      last_page->size) {
 			last_page->size += size;
-			return;
+			return 0;
 		}
 	}
 
@@ -551,9 +686,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
 	page->size = size;
+
+	return 0;
 }
 
-static void
+static int
 add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		uint64_t page_size)
 {
@@ -567,7 +704,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+		return -1;
+
 	host_user_addr  += size;
 	guest_phys_addr += size;
 	reg_size -= size;
@@ -576,12 +715,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		size = RTE_MIN(reg_size, page_size);
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
-		add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+				size) < 0)
+			return -1;
 
 		host_user_addr  += size;
 		guest_phys_addr += size;
 		reg_size -= size;
 	}
+
+	return 0;
 }
 
 #ifdef RTE_LIBRTE_VHOST_DEBUG
@@ -635,8 +778,9 @@ vhost_memory_changed(struct VhostUserMemory *new,
 }
 
 static int
-vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 {
+	struct virtio_net *dev = *pdev;
 	struct VhostUserMemory memory = pmsg->payload.memory;
 	struct rte_vhost_mem_region *reg;
 	void *mmap_addr;
@@ -644,8 +788,15 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 	uint64_t mmap_offset;
 	uint64_t alignment;
 	uint32_t i;
+	int populate;
 	int fd;
 
+	if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"too many memory regions (%u)\n", memory.nregions);
+		return -1;
+	}
+
 	if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"(%d) memory regions not changed\n", dev->vid);
@@ -662,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		dev->mem = NULL;
 	}
 
+	/* Flush IOTLB cache as previous HVAs are now invalid */
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		for (i = 0; i < dev->nr_vring; i++)
+			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+
 	dev->nr_guest_pages = 0;
 	if (!dev->guest_pages) {
 		dev->max_guest_pages = 8;
@@ -696,7 +852,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		reg->fd              = fd;
 
 		mmap_offset = memory.regions[i].mmap_offset;
-		mmap_size   = reg->size + mmap_offset;
+
+		/* Check for memory_size + mmap_offset overflow */
+		if (mmap_offset >= -reg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap_offset (%#"PRIx64") and memory_size "
+				"(%#"PRIx64") overflow\n",
+				mmap_offset, reg->size);
+			goto err_mmap;
+		}
+
+		mmap_size = reg->size + mmap_offset;
 
 		/* mmap() without flag of MAP_ANONYMOUS, should be called
 		 * with length argument aligned with hugepagesz at older
@@ -714,8 +880,9 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		}
 		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
 
+		populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
 		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
-				 MAP_SHARED | MAP_POPULATE, fd, 0);
+				 MAP_SHARED | populate, fd, 0);
 
 		if (mmap_addr == MAP_FAILED) {
 			RTE_LOG(ERR, VHOST_CONFIG,
@@ -729,7 +896,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 				      mmap_offset;
 
 		if (dev->dequeue_zero_copy)
-			add_guest_pages(dev, reg, alignment);
+			if (add_guest_pages(dev, reg, alignment) < 0) {
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"adding guest pages to region %u failed.\n",
+					i);
+				goto err_mmap;
+			}
 
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"guest memory region %u, size: 0x%" PRIx64 "\n"
@@ -750,6 +922,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 			mmap_offset);
 	}
 
+	for (i = 0; i < dev->nr_vring; i++) {
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+		if (vq->desc || vq->avail || vq->used) {
+			/*
+			 * If the memory table got updated, the ring addresses
+			 * need to be translated again as virtual addresses have
+			 * changed.
+			 */
+			vring_invalidate(dev, vq);
+
+			dev = translate_ring_addresses(dev, i);
+			if (!dev)
+				return -1;
+
+			*pdev = dev;
+		}
+	}
+
 	dump_guest_pages(dev);
 
 	return 0;
@@ -761,10 +952,20 @@ err_mmap:
 	return -1;
 }
 
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
+static bool
+vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	return vq && vq->desc && vq->avail && vq->used &&
+	bool rings_ok;
+
+	if (!vq)
+		return false;
+
+	if (vq_is_packed(dev))
+		rings_ok = !!vq->desc_packed;
+	else
+		rings_ok = vq->desc && vq->avail && vq->used;
+
+	return rings_ok &&
 	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
 	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
@@ -781,7 +982,7 @@ virtio_is_ready(struct virtio_net *dev)
 	for (i = 0; i < dev->nr_vring; i++) {
 		vq = dev->virtqueue[i];
 
-		if (!vq_is_ready(vq))
+		if (!vq_is_ready(dev, vq))
 			return 0;
 	}
 
@@ -874,15 +1075,13 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
 	/* We have to stop the queue (virtio) if it is running. */
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	dev->flags &= ~VIRTIO_DEV_READY;
+	dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
 
-	/* Here we are safe to get the last used index */
-	msg->payload.state.num = vq->last_used_idx;
+	/* Here we are safe to get the last avail index */
+	msg->payload.state.num = vq->last_avail_idx;
 
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring base idx:%d file:%d\n", msg->payload.state.index,
@@ -897,10 +1096,20 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 
 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
 
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
 	if (dev->dequeue_zero_copy)
 		free_zmbufs(vq);
-	rte_free(vq->shadow_used_ring);
-	vq->shadow_used_ring = NULL;
+	if (vq_is_packed(dev)) {
+		rte_free(vq->shadow_used_packed);
+		vq->shadow_used_packed = NULL;
+	} else {
+		rte_free(vq->shadow_used_split);
+		vq->shadow_used_split = NULL;
+	}
 
 	rte_free(vq->batch_copy_elems);
 	vq->batch_copy_elems = NULL;
@@ -917,16 +1126,24 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
 			    VhostUserMsg *msg)
 {
 	int enable = (int)msg->payload.state.num;
+	int index = (int)msg->payload.state.index;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"set queue enable: %d to qp idx: %d\n",
-		enable, msg->payload.state.index);
+		enable, index);
+
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->set_vring_state)
+		vdpa_dev->ops->set_vring_state(dev->vid, index, enable);
 
 	if (dev->notify_ops->vring_state_changed)
 		dev->notify_ops->vring_state_changed(dev->vid,
-				msg->payload.state.index, enable);
+				index, enable);
 
-	dev->virtqueue[msg->payload.state.index]->enabled = enable;
+	dev->virtqueue[index]->enabled = enable;
 
 	return 0;
 }
@@ -935,9 +1152,10 @@ static void
 vhost_user_get_protocol_features(struct virtio_net *dev,
 				 struct VhostUserMsg *msg)
 {
-	uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+	uint64_t features, protocol_features;
 
 	rte_vhost_driver_get_features(dev->ifname, &features);
+	rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features);
 
 	/*
 	 * REPLY_ACK protocol feature is only mandatory for now
@@ -983,6 +1201,15 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 
 	size = msg->payload.log.mmap_size;
 	off  = msg->payload.log.mmap_offset;
+
+	/* Don't allow mmap_offset to point outside the mmap region */
+	if (off > size) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"log offset %#"PRIx64" exceeds log size %#"PRIx64"\n",
+			off, size);
+		return -1;
+	}
+
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"log mmap size: %"PRId64", offset: %"PRId64"\n",
 		size, off);
@@ -991,7 +1218,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
 	 * fail when offset is not page size aligned.
 	 */
-	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	close(fd);
 	if (addr == MAP_FAILED) {
 		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
@@ -1024,6 +1251,8 @@ static int
 vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
 {
 	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	RTE_LOG(DEBUG, VHOST_CONFIG,
 		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -1039,6 +1268,10 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
 	 */
 	rte_smp_wmb();
 	rte_atomic16_set(&dev->broadcast_rarp, 1);
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->migration_done)
+		vdpa_dev->ops->migration_done(dev->vid);
 
 	return 0;
 }
@@ -1131,11 +1364,12 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 	struct virtio_net *dev = *pdev;
 	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
 	uint16_t i;
-	uint64_t vva;
+	uint64_t vva, len;
 
 	switch (imsg->type) {
 	case VHOST_IOTLB_UPDATE:
-		vva = qva_to_vva(dev, imsg->uaddr);
+		len = imsg->size;
+		vva = qva_to_vva(dev, imsg->uaddr, &len);
 		if (!vva)
 			return -1;
 
@@ -1143,7 +1377,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 			struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
-					imsg->size, imsg->perm);
+					len, imsg->perm);
 
 			if (is_vring_iotlb_update(vq, imsg))
 				*pdev = dev = translate_ring_addresses(dev, i);
@@ -1200,13 +1434,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 }
 
 static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num)
 {
 	if (!msg)
 		return 0;
 
 	return send_fd_message(sockfd, (char *)msg,
-		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+		VHOST_USER_HDR_SIZE + msg->size, fds, fd_num);
 }
 
 static int
@@ -1220,7 +1454,23 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
 	msg->flags |= VHOST_USER_VERSION;
 	msg->flags |= VHOST_USER_REPLY_MASK;
 
-	return send_vhost_message(sockfd, msg);
+	return send_vhost_message(sockfd, msg, NULL, 0);
+}
+
+static int
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
+			 int *fds, int fd_num)
+{
+	int ret;
+
+	if (msg->flags & VHOST_USER_NEED_REPLY)
+		rte_spinlock_lock(&dev->slave_req_lock);
+
+	ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+	if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
+		rte_spinlock_unlock(&dev->slave_req_lock);
+
+	return ret;
 }
 
 /*
@@ -1300,8 +1550,11 @@ vhost_user_msg_handler(int vid, int fd)
 {
 	struct virtio_net *dev;
 	struct VhostUserMsg msg;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 	int ret;
 	int unlock_required = 0;
+	uint32_t skip_master = 0;
 
 	dev = get_device(vid);
 	if (dev == NULL)
@@ -1379,6 +1632,21 @@ vhost_user_msg_handler(int vid, int fd)
 
 	}
 
+	if (dev->extern_ops.pre_msg_handle) {
+		uint32_t need_reply;
+
+		ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
+				(void *)&msg, &need_reply, &skip_master);
+		if (ret < 0)
+			goto skip_to_reply;
+
+		if (need_reply)
+			send_vhost_reply(fd, &msg);
+
+		if (skip_master)
+			goto skip_to_post_handle;
+	}
+
 	switch (msg.request.master) {
 	case VHOST_USER_GET_FEATURES:
 		msg.payload.u64 = vhost_user_get_features(dev);
@@ -1407,7 +1675,7 @@ vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_SET_MEM_TABLE:
-		ret = vhost_user_set_mem_table(dev, &msg);
+		ret = vhost_user_set_mem_table(&dev, &msg);
 		break;
 
 	case VHOST_USER_SET_LOG_BASE:
@@ -1452,7 +1720,7 @@ vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_GET_QUEUE_NUM:
-		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev);
 		msg.size = sizeof(msg.payload.u64);
 		send_vhost_reply(fd, &msg);
 		break;
@@ -1479,9 +1747,22 @@ vhost_user_msg_handler(int vid, int fd)
 	default:
 		ret = -1;
 		break;
+	}
+
+skip_to_post_handle:
+	if (dev->extern_ops.post_msg_handle) {
+		uint32_t need_reply;
 
+		ret = (*dev->extern_ops.post_msg_handle)(
+				dev->vid, (void *)&msg, &need_reply);
+		if (ret < 0)
+			goto skip_to_reply;
+
+		if (need_reply)
+			send_vhost_reply(fd, &msg);
 	}
 
+skip_to_reply:
 	if (unlock_required)
 		vhost_user_unlock_all_queue_pairs(dev);
 
@@ -1505,9 +1786,53 @@ vhost_user_msg_handler(int vid, int fd)
 		}
 	}
 
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && virtio_is_ready(dev) &&
+			!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) &&
+			msg.request.master == VHOST_USER_SET_VRING_ENABLE) {
+		if (vdpa_dev->ops->dev_conf)
+			vdpa_dev->ops->dev_conf(dev->vid);
+		dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+		if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"(%d) software relay is used for vDPA, performance may be low.\n",
+				dev->vid);
+		}
+	}
+
 	return 0;
 }
 
+static int process_slave_message_reply(struct virtio_net *dev,
+				       const VhostUserMsg *msg)
+{
+	VhostUserMsg msg_reply;
+	int ret;
+
+	if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
+		return 0;
+
+	if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) {
+		ret = -1;
+		goto out;
+	}
+
+	if (msg_reply.request.slave != msg->request.slave) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Received unexpected msg type (%u), expected %u\n",
+			msg_reply.request.slave, msg->request.slave);
+		ret = -1;
+		goto out;
+	}
+
+	ret = msg_reply.payload.u64 ? -1 : 0;
+
+out:
+	rte_spinlock_unlock(&dev->slave_req_lock);
+	return ret;
+}
+
 int
 vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 {
@@ -1523,7 +1848,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 		},
 	};
 
-	ret = send_vhost_message(dev->slave_req_fd, &msg);
+	ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 				"Failed to send IOTLB miss message (%d)\n",
@@ -1533,3 +1858,101 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 
 	return 0;
 }
+
+static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
+						    int index, int fd,
+						    uint64_t offset,
+						    uint64_t size)
+{
+	int *fdp = NULL;
+	size_t fd_num = 0;
+	int ret;
+	struct VhostUserMsg msg = {
+		.request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
+		.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+		.size = sizeof(msg.payload.area),
+		.payload.area = {
+			.u64 = index & VHOST_USER_VRING_IDX_MASK,
+			.size = size,
+			.offset = offset,
+		},
+	};
+
+	if (fd < 0)
+		msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+	else {
+		fdp = &fd;
+		fd_num = 1;
+	}
+
+	ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to set host notifier (%d)\n", ret);
+		return ret;
+	}
+
+	return process_slave_message_reply(dev, &msg);
+}
+
+int vhost_user_host_notifier_ctrl(int vid, bool enable)
+{
+	struct virtio_net *dev;
+	struct rte_vdpa_device *vdpa_dev;
+	int vfio_device_fd, did, ret = 0;
+	uint64_t offset, size;
+	unsigned int i;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -ENODEV;
+
+	did = dev->vdpa_dev_id;
+	if (did < 0)
+		return -EINVAL;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
+	    !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER)))
+		return -ENOTSUP;
+
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev)
+		return -ENODEV;
+
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP);
+
+	vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid);
+	if (vfio_device_fd < 0)
+		return -ENOTSUP;
+
+	if (enable) {
+		for (i = 0; i < dev->nr_vring; i++) {
+			if (vdpa_dev->ops->get_notify_area(vid, i, &offset,
+					&size) < 0) {
+				ret = -ENOTSUP;
+				goto disable;
+			}
+
+			if (vhost_user_slave_set_vring_host_notifier(dev, i,
+					vfio_device_fd, offset, size) < 0) {
+				ret = -EFAULT;
+				goto disable;
+			}
+		}
+	} else {
+disable:
+		for (i = 0; i < dev->nr_vring; i++) {
+			vhost_user_slave_set_vring_host_notifier(dev, i, -1,
+					0, 0);
+		}
+	}
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index d4bd604b..42166adf 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _VHOST_NET_USER_H
@@ -14,19 +14,15 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
-#define VHOST_USER_PROTOCOL_F_MQ	0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
-#define VHOST_USER_PROTOCOL_F_RARP	2
-#define VHOST_USER_PROTOCOL_F_REPLY_ACK	3
-#define VHOST_USER_PROTOCOL_F_NET_MTU 4
-#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
-
 #define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
 					 (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
-					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ))
+					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
 
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
@@ -52,12 +48,15 @@ typedef enum VhostUserRequest {
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
-	VHOST_USER_MAX
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_MAX = 28
 } VhostUserRequest;
 
 typedef enum VhostUserSlaveRequest {
 	VHOST_USER_SLAVE_NONE = 0,
 	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 	VHOST_USER_SLAVE_MAX
 } VhostUserSlaveRequest;
 
@@ -79,10 +78,40 @@ typedef struct VhostUserLog {
 	uint64_t mmap_offset;
 } VhostUserLog;
 
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
 typedef struct VhostUserMsg {
 	union {
-		VhostUserRequest master;
-		VhostUserSlaveRequest slave;
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
 	} request;
 
 #define VHOST_USER_VERSION_MASK     0x3
@@ -99,6 +128,8 @@ typedef struct VhostUserMsg {
 		VhostUserMemory memory;
 		VhostUserLog    log;
 		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
 	} payload;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 } __attribute((packed)) VhostUserMsg;
@@ -112,6 +143,7 @@ typedef struct VhostUserMsg {
 /* vhost_user.c */
 int vhost_user_msg_handler(int vid, int fd);
 int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+int vhost_user_host_notifier_ctrl(int vid, bool enable);
 
 /* socket.c */
 int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
diff --git a/lib/librte_vhost/virtio_crypto.h b/lib/librte_vhost/virtio_crypto.h
new file mode 100644
index 00000000..e3b93573
--- /dev/null
+++ b/lib/librte_vhost/virtio_crypto.h
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 HUAWEI TECHNOLOGIES CO., LTD.
+ */
+
+#ifndef _VIRTIO_CRYPTO_H
+#define _VIRTIO_CRYPTO_H
+
+#define VIRTIO_CRYPTO_SERVICE_CIPHER 0
+#define VIRTIO_CRYPTO_SERVICE_HASH   1
+#define VIRTIO_CRYPTO_SERVICE_MAC    2
+#define VIRTIO_CRYPTO_SERVICE_AEAD   3
+
+#define VIRTIO_CRYPTO_OPCODE(service, op)   (((service) << 8) | (op))
+
+struct virtio_crypto_ctrl_header {
+#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02)
+#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03)
+#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02)
+#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03)
+#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02)
+#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03)
+#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02)
+#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03)
+	uint32_t opcode;
+	uint32_t algo;
+	uint32_t flag;
+	/* data virtqueue id */
+	uint32_t queue_id;
+};
+
+struct virtio_crypto_cipher_session_para {
+#define VIRTIO_CRYPTO_NO_CIPHER                 0
+#define VIRTIO_CRYPTO_CIPHER_ARC4               1
+#define VIRTIO_CRYPTO_CIPHER_AES_ECB            2
+#define VIRTIO_CRYPTO_CIPHER_AES_CBC            3
+#define VIRTIO_CRYPTO_CIPHER_AES_CTR            4
+#define VIRTIO_CRYPTO_CIPHER_DES_ECB            5
+#define VIRTIO_CRYPTO_CIPHER_DES_CBC            6
+#define VIRTIO_CRYPTO_CIPHER_3DES_ECB           7
+#define VIRTIO_CRYPTO_CIPHER_3DES_CBC           8
+#define VIRTIO_CRYPTO_CIPHER_3DES_CTR           9
+#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8          10
+#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2        11
+#define VIRTIO_CRYPTO_CIPHER_AES_F8             12
+#define VIRTIO_CRYPTO_CIPHER_AES_XTS            13
+#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3           14
+	uint32_t algo;
+	/* length of key */
+	uint32_t keylen;
+
+#define VIRTIO_CRYPTO_OP_ENCRYPT  1
+#define VIRTIO_CRYPTO_OP_DECRYPT  2
+	/* encrypt or decrypt */
+	uint32_t op;
+	uint32_t padding;
+};
+
+struct virtio_crypto_session_input {
+	/* Device-writable part */
+	uint64_t session_id;
+	uint32_t status;
+	uint32_t padding;
+};
+
+struct virtio_crypto_cipher_session_req {
+	struct virtio_crypto_cipher_session_para para;
+	uint8_t padding[32];
+};
+
+struct virtio_crypto_hash_session_para {
+#define VIRTIO_CRYPTO_NO_HASH            0
+#define VIRTIO_CRYPTO_HASH_MD5           1
+#define VIRTIO_CRYPTO_HASH_SHA1          2
+#define VIRTIO_CRYPTO_HASH_SHA_224       3
+#define VIRTIO_CRYPTO_HASH_SHA_256       4
+#define VIRTIO_CRYPTO_HASH_SHA_384       5
+#define VIRTIO_CRYPTO_HASH_SHA_512       6
+#define VIRTIO_CRYPTO_HASH_SHA3_224      7
+#define VIRTIO_CRYPTO_HASH_SHA3_256      8
+#define VIRTIO_CRYPTO_HASH_SHA3_384      9
+#define VIRTIO_CRYPTO_HASH_SHA3_512      10
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128      11
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256      12
+	uint32_t algo;
+	/* hash result length */
+	uint32_t hash_result_len;
+	uint8_t padding[8];
+};
+
+struct virtio_crypto_hash_create_session_req {
+	struct virtio_crypto_hash_session_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_mac_session_para {
+#define VIRTIO_CRYPTO_NO_MAC                       0
+#define VIRTIO_CRYPTO_MAC_HMAC_MD5                 1
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA1                2
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224             3
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256             4
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384             5
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512             6
+#define VIRTIO_CRYPTO_MAC_CMAC_3DES                25
+#define VIRTIO_CRYPTO_MAC_CMAC_AES                 26
+#define VIRTIO_CRYPTO_MAC_KASUMI_F9                27
+#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2              28
+#define VIRTIO_CRYPTO_MAC_GMAC_AES                 41
+#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH             42
+#define VIRTIO_CRYPTO_MAC_CBCMAC_AES               49
+#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9         50
+#define VIRTIO_CRYPTO_MAC_XCBC_AES                 53
+	uint32_t algo;
+	/* hash result length */
+	uint32_t hash_result_len;
+	/* length of authenticated key */
+	uint32_t auth_key_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_mac_create_session_req {
+	struct virtio_crypto_mac_session_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_aead_session_para {
+#define VIRTIO_CRYPTO_NO_AEAD     0
+#define VIRTIO_CRYPTO_AEAD_GCM    1
+#define VIRTIO_CRYPTO_AEAD_CCM    2
+#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305  3
+	uint32_t algo;
+	/* length of key */
+	uint32_t key_len;
+	/* hash result length */
+	uint32_t hash_result_len;
+	/* length of the additional authenticated data (AAD) in bytes */
+	uint32_t aad_len;
+	/* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */
+	uint32_t op;
+	uint32_t padding;
+};
+
+struct virtio_crypto_aead_create_session_req {
+	struct virtio_crypto_aead_session_para para;
+	uint8_t padding[32];
+};
+
+struct virtio_crypto_alg_chain_session_para {
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER  1
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH  2
+	uint32_t alg_chain_order;
+/* Plain hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN    1
+/* Authenticated hash (mac) */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH     2
+/* Nested hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED   3
+	uint32_t hash_mode;
+	struct virtio_crypto_cipher_session_para cipher_param;
+	union {
+		struct virtio_crypto_hash_session_para hash_param;
+		struct virtio_crypto_mac_session_para mac_param;
+		uint8_t padding[16];
+	} u;
+	/* length of the additional authenticated data (AAD) in bytes */
+	uint32_t aad_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_alg_chain_session_req {
+	struct virtio_crypto_alg_chain_session_para para;
+};
+
+struct virtio_crypto_sym_create_session_req {
+	union {
+		struct virtio_crypto_cipher_session_req cipher;
+		struct virtio_crypto_alg_chain_session_req chain;
+		uint8_t padding[48];
+	} u;
+
+	/* Device-readable part */
+
+/* No operation */
+#define VIRTIO_CRYPTO_SYM_OP_NONE  0
+/* Cipher only operation on the data */
+#define VIRTIO_CRYPTO_SYM_OP_CIPHER  1
+/*
+ * Chain any cipher with any hash or mac operation. The order
+ * depends on the value of alg_chain_order param
+ */
+#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING  2
+	uint32_t op_type;
+	uint32_t padding;
+};
+
+struct virtio_crypto_destroy_session_req {
+	/* Device-readable part */
+	uint64_t  session_id;
+	uint8_t padding[48];
+};
+
+/* The request of the control virtqueue's packet */
+struct virtio_crypto_op_ctrl_req {
+	struct virtio_crypto_ctrl_header header;
+
+	union {
+		struct virtio_crypto_sym_create_session_req
+			sym_create_session;
+		struct virtio_crypto_hash_create_session_req
+			hash_create_session;
+		struct virtio_crypto_mac_create_session_req
+			mac_create_session;
+		struct virtio_crypto_aead_create_session_req
+			aead_create_session;
+		struct virtio_crypto_destroy_session_req
+			destroy_session;
+		uint8_t padding[56];
+	} u;
+};
+
+struct virtio_crypto_op_header {
+#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00)
+#define VIRTIO_CRYPTO_CIPHER_DECRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01)
+#define VIRTIO_CRYPTO_HASH \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00)
+#define VIRTIO_CRYPTO_MAC \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00)
+#define VIRTIO_CRYPTO_AEAD_ENCRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00)
+#define VIRTIO_CRYPTO_AEAD_DECRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01)
+	uint32_t opcode;
+	/* algo should be service-specific algorithms */
+	uint32_t algo;
+	/* session_id should be service-specific algorithms */
+	uint64_t session_id;
+	/* control flag to control the request */
+	uint32_t flag;
+	uint32_t padding;
+};
+
+struct virtio_crypto_cipher_para {
+	/*
+	 * Byte Length of valid IV/Counter
+	 *
+	 * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for
+	 *   SNOW3G in UEA2 mode, this is the length of the IV (which
+	 *   must be the same as the block length of the cipher).
+	 * For block ciphers in CTR mode, this is the length of the counter
+	 *   (which must be the same as the block length of the cipher).
+	 * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007.
+	 *
+	 * The IV/Counter will be updated after every partial cryptographic
+	 * operation.
+	 */
+	uint32_t iv_len;
+	/* length of source data */
+	uint32_t src_data_len;
+	/* length of dst data */
+	uint32_t dst_data_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_hash_para {
+	/* length of source data */
+	uint32_t src_data_len;
+	/* hash result length */
+	uint32_t hash_result_len;
+};
+
+struct virtio_crypto_mac_para {
+	struct virtio_crypto_hash_para hash;
+};
+
+struct virtio_crypto_aead_para {
+	/*
+	 * Byte Length of valid IV data pointed to by the below iv_addr
+	 * parameter.
+	 *
+	 * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which
+	 *   case iv_addr points to J0.
+	 * For CCM mode, this is the length of the nonce, which can be in the
+	 *   range 7 to 13 inclusive.
+	 */
+	uint32_t iv_len;
+	/* length of additional auth data */
+	uint32_t aad_len;
+	/* length of source data */
+	uint32_t src_data_len;
+	/* length of dst data */
+	uint32_t dst_data_len;
+};
+
+struct virtio_crypto_cipher_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_cipher_para para;
+	uint8_t padding[24];
+};
+
+struct virtio_crypto_hash_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_hash_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_mac_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_mac_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_alg_chain_data_para {
+	uint32_t iv_len;
+	/* Length of source data */
+	uint32_t src_data_len;
+	/* Length of destination data */
+	uint32_t dst_data_len;
+	/* Starting point for cipher processing in source data */
+	uint32_t cipher_start_src_offset;
+	/* Length of the source data that the cipher will be computed on */
+	uint32_t len_to_cipher;
+	/* Starting point for hash processing in source data */
+	uint32_t hash_start_src_offset;
+	/* Length of the source data that the hash will be computed on */
+	uint32_t len_to_hash;
+	/* Length of the additional auth data */
+	uint32_t aad_len;
+	/* Length of the hash result */
+	uint32_t hash_result_len;
+	uint32_t reserved;
+};
+
+struct virtio_crypto_alg_chain_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_alg_chain_data_para para;
+};
+
+struct virtio_crypto_sym_data_req {
+	union {
+		struct virtio_crypto_cipher_data_req cipher;
+		struct virtio_crypto_alg_chain_data_req chain;
+		uint8_t padding[40];
+	} u;
+
+	/* See above VIRTIO_CRYPTO_SYM_OP_* */
+	uint32_t op_type;
+	uint32_t padding;
+};
+
+struct virtio_crypto_aead_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_aead_para para;
+	uint8_t padding[32];
+};
+
+/* The request of the data virtqueue's packet */
+struct virtio_crypto_op_data_req {
+	struct virtio_crypto_op_header header;
+
+	union {
+		struct virtio_crypto_sym_data_req  sym_req;
+		struct virtio_crypto_hash_data_req hash_req;
+		struct virtio_crypto_mac_data_req mac_req;
+		struct virtio_crypto_aead_data_req aead_req;
+		uint8_t padding[48];
+	} u;
+};
+
+#define VIRTIO_CRYPTO_OK        0
+#define VIRTIO_CRYPTO_ERR       1
+#define VIRTIO_CRYPTO_BADMSG    2
+#define VIRTIO_CRYPTO_NOTSUPP   3
+#define VIRTIO_CRYPTO_INVSESS   4 /* Invalid session id */
+
+/* The accelerator hardware is ready */
+#define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
+
+struct virtio_crypto_config {
+	/* See VIRTIO_CRYPTO_OP_* above */
+	uint32_t  status;
+
+	/*
+	 * Maximum number of data queue
+	 */
+	uint32_t  max_dataqueues;
+
+	/*
+	 * Specifies the services mask which the device support,
+	 * see VIRTIO_CRYPTO_SERVICE_* above
+	 */
+	uint32_t crypto_services;
+
+	/* Detailed algorithms mask */
+	uint32_t cipher_algo_l;
+	uint32_t cipher_algo_h;
+	uint32_t hash_algo;
+	uint32_t mac_algo_l;
+	uint32_t mac_algo_h;
+	uint32_t aead_algo;
+	/* Maximum length of cipher key */
+	uint32_t max_cipher_key_len;
+	/* Maximum length of authenticated key */
+	uint32_t max_auth_key_len;
+	uint32_t reserve;
+	/* Maximum size of each crypto request's content */
+	uint64_t max_size;
+};
+
+struct virtio_crypto_inhdr {
+	/* See VIRTIO_CRYPTO_* above */
+	uint8_t status;
+};
+#endif /* _VIRTIO_CRYPTO_H */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 700aca7c..99c7afc8 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -16,6 +16,7 @@
 #include <rte_sctp.h>
 #include <rte_arp.h>
 #include <rte_spinlock.h>
+#include <rte_malloc.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -24,60 +25,174 @@
 
 #define MAX_BATCH_LEN 256
 
+static  __rte_always_inline bool
+rxvq_is_mergeable(struct virtio_net *dev)
+{
+	return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
+}
+
 static bool
 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 {
 	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
+static __rte_always_inline void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t desc_addr, uint64_t desc_len)
+{
+	void *idesc;
+	uint64_t src, dst;
+	uint64_t len, remain = desc_len;
+
+	idesc = rte_malloc(__func__, desc_len, 0);
+	if (unlikely(!idesc))
+		return 0;
+
+	dst = (uint64_t)(uintptr_t)idesc;
+
+	while (remain) {
+		len = remain;
+		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !len)) {
+			rte_free(idesc);
+			return 0;
+		}
+
+		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+		remain -= len;
+		dst += len;
+		desc_addr += len;
+	}
+
+	return idesc;
+}
+
 static __rte_always_inline void
-do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			  uint16_t to, uint16_t from, uint16_t size)
+free_ind_table(void *idesc)
+{
+	rte_free(idesc);
+}
+
+static __rte_always_inline void
+do_flush_shadow_used_ring_split(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			uint16_t to, uint16_t from, uint16_t size)
 {
 	rte_memcpy(&vq->used->ring[to],
-			&vq->shadow_used_ring[from],
+			&vq->shadow_used_split[from],
 			size * sizeof(struct vring_used_elem));
-	vhost_log_used_vring(dev, vq,
+	vhost_log_cache_used_vring(dev, vq,
 			offsetof(struct vring_used, ring[to]),
 			size * sizeof(struct vring_used_elem));
 }
 
 static __rte_always_inline void
-flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
 
 	if (used_idx + vq->shadow_used_idx <= vq->size) {
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
 					  vq->shadow_used_idx);
 	} else {
 		uint16_t size;
 
 		/* update used ring interval [used_idx, vq->size] */
 		size = vq->size - used_idx;
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
 
 		/* update the left half used ring interval [0, left_size] */
-		do_flush_shadow_used_ring(dev, vq, 0, size,
+		do_flush_shadow_used_ring_split(dev, vq, 0, size,
 					  vq->shadow_used_idx - size);
 	}
 	vq->last_used_idx += vq->shadow_used_idx;
 
 	rte_smp_wmb();
 
+	vhost_log_cache_sync(dev, vq);
+
 	*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+	vq->shadow_used_idx = 0;
 	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
 }
 
 static __rte_always_inline void
-update_shadow_used_ring(struct vhost_virtqueue *vq,
+update_shadow_used_ring_split(struct vhost_virtqueue *vq,
 			 uint16_t desc_idx, uint16_t len)
 {
 	uint16_t i = vq->shadow_used_idx++;
 
-	vq->shadow_used_ring[i].id  = desc_idx;
-	vq->shadow_used_ring[i].len = len;
+	vq->shadow_used_split[i].id  = desc_idx;
+	vq->shadow_used_split[i].len = len;
+}
+
+static __rte_always_inline void
+flush_shadow_used_ring_packed(struct virtio_net *dev,
+			struct vhost_virtqueue *vq)
+{
+	int i;
+	uint16_t used_idx = vq->last_used_idx;
+
+	/* Split loop in two to save memory barriers */
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
+		vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
+
+		used_idx += vq->shadow_used_packed[i].count;
+		if (used_idx >= vq->size)
+			used_idx -= vq->size;
+	}
+
+	rte_smp_wmb();
+
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		uint16_t flags;
+
+		if (vq->shadow_used_packed[i].len)
+			flags = VRING_DESC_F_WRITE;
+		else
+			flags = 0;
+
+		if (vq->used_wrap_counter) {
+			flags |= VRING_DESC_F_USED;
+			flags |= VRING_DESC_F_AVAIL;
+		} else {
+			flags &= ~VRING_DESC_F_USED;
+			flags &= ~VRING_DESC_F_AVAIL;
+		}
+
+		vq->desc_packed[vq->last_used_idx].flags = flags;
+
+		vhost_log_cache_used_vring(dev, vq,
+					vq->last_used_idx *
+					sizeof(struct vring_packed_desc),
+					sizeof(struct vring_packed_desc));
+
+		vq->last_used_idx += vq->shadow_used_packed[i].count;
+		if (vq->last_used_idx >= vq->size) {
+			vq->used_wrap_counter ^= 1;
+			vq->last_used_idx -= vq->size;
+		}
+	}
+
+	rte_smp_wmb();
+	vq->shadow_used_idx = 0;
+	vhost_log_cache_sync(dev, vq);
+}
+
+static __rte_always_inline void
+update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+			 uint16_t desc_idx, uint16_t len, uint16_t count)
+{
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_packed[i].id  = desc_idx;
+	vq->shadow_used_packed[i].len = len;
+	vq->shadow_used_packed[i].count = count;
 }
 
 static inline void
@@ -89,9 +204,11 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
 
 	for (i = 0; i < count; i++) {
 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
-		vhost_log_write(dev, elem[i].log_addr, elem[i].len);
+		vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
 		PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
 	}
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 static inline void
@@ -103,6 +220,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 
 	for (i = 0; i < count; i++)
 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 /* avoid write operation when necessary, to lessen cache issues */
@@ -111,7 +230,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 		(var) = (val);			\
 } while (0)
 
-static void
+static __rte_always_inline void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
@@ -173,273 +292,268 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, struct rte_mbuf *m,
-		  uint16_t desc_idx, uint32_t size)
+map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct buf_vector *buf_vec, uint16_t *vec_idx,
+		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
 {
-	uint32_t desc_avail, desc_offset;
-	uint32_t mbuf_avail, mbuf_offset;
-	uint32_t cpy_len;
-	struct vring_desc *desc;
-	uint64_t desc_addr;
-	/* A counter to avoid desc dead loop chain */
-	uint16_t nr_desc = 1;
-	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
-	int error = 0;
+	uint16_t vec_id = *vec_idx;
 
-	desc = &descs[desc_idx];
-	desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-					desc->len, VHOST_ACCESS_RW);
-	/*
-	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-	 * otherwise stores offset on the stack instead of in a register.
-	 */
-	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
-		error = -1;
-		goto out;
+	while (desc_len) {
+		uint64_t desc_addr;
+		uint64_t desc_chunck_len = desc_len;
+
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
+			return -1;
+
+		desc_addr = vhost_iova_to_vva(dev, vq,
+				desc_iova,
+				&desc_chunck_len,
+				perm);
+		if (unlikely(!desc_addr))
+			return -1;
+
+		buf_vec[vec_id].buf_iova = desc_iova;
+		buf_vec[vec_id].buf_addr = desc_addr;
+		buf_vec[vec_id].buf_len  = desc_chunck_len;
+
+		desc_len -= desc_chunck_len;
+		desc_iova += desc_chunck_len;
+		vec_id++;
 	}
+	*vec_idx = vec_id;
 
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
+	return 0;
+}
 
-	virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr);
-	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+static __rte_always_inline int
+fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			 uint32_t avail_idx, uint16_t *vec_idx,
+			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+			 uint16_t *desc_chain_len, uint8_t perm)
+{
+	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
+	uint16_t vec_id = *vec_idx;
+	uint32_t len    = 0;
+	uint64_t dlen;
+	struct vring_desc *descs = vq->desc;
+	struct vring_desc *idesc = NULL;
 
-	desc_offset = dev->vhost_hlen;
-	desc_avail  = desc->len - dev->vhost_hlen;
+	*desc_chain_head = idx;
 
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current mbuf, fetch next */
-		if (mbuf_avail == 0) {
-			m = m->next;
+	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+		dlen = vq->desc[idx].len;
+		descs = (struct vring_desc *)(uintptr_t)
+			vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
+						&dlen,
+						VHOST_ACCESS_RO);
+		if (unlikely(!descs))
+			return -1;
 
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
-		}
+		if (unlikely(dlen < vq->desc[idx].len)) {
+			/*
+			 * The indirect desc table is not contiguous
+			 * in process VA space, we have to copy it.
+			 */
+			idesc = alloc_copy_ind_table(dev, vq,
+					vq->desc[idx].addr, vq->desc[idx].len);
+			if (unlikely(!idesc))
+				return -1;
 
-		/* done with current desc buf, fetch next */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-				/* Room in vring buffer is not enough */
-				error = -1;
-				goto out;
-			}
-			if (unlikely(desc->next >= size || ++nr_desc > size)) {
-				error = -1;
-				goto out;
-			}
+			descs = idesc;
+		}
 
-			desc = &descs[desc->next];
-			desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+		idx = 0;
+	}
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
+	while (1) {
+		if (unlikely(idx >= vq->size)) {
+			free_ind_table(idesc);
+			return -1;
 		}
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-				cpy_len);
-			vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-				     cpy_len, 0);
-		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc->addr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+		len += descs[idx].len;
+
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[idx].addr, descs[idx].len,
+						perm))) {
+			free_ind_table(idesc);
+			return -1;
 		}
 
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+			break;
+
+		idx = descs[idx].next;
 	}
 
-out:
-	vq->batch_copy_nb_elems = copy_nb;
+	*desc_chain_len = len;
+	*vec_idx = vec_id;
 
-	return error;
+	if (unlikely(!!idesc))
+		free_ind_table(idesc);
+
+	return 0;
 }
 
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are successfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
+/*
+ * Returns -1 on fail, 0 on success
  */
-static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	      struct rte_mbuf **pkts, uint32_t count)
+static inline int
+reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint32_t size, struct buf_vector *buf_vec,
+				uint16_t *num_buffers, uint16_t avail_head,
+				uint16_t *nr_vec)
 {
-	struct vhost_virtqueue *vq;
-	uint16_t avail_idx, free_entries, start_idx;
-	uint16_t desc_indexes[MAX_PKT_BURST];
-	struct vring_desc *descs;
-	uint16_t used_idx;
-	uint32_t i, sz;
+	uint16_t cur_idx;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
+	uint16_t head_idx = 0;
+	uint16_t len = 0;
 
-	vq = dev->virtqueue[queue_id];
+	*num_buffers = 0;
+	cur_idx  = vq->last_avail_idx;
 
-	rte_spinlock_lock(&vq->access_lock);
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+	while (size > 0) {
+		if (unlikely(cur_idx == avail_head))
+			return -1;
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(++tries > max_tries))
+			return -1;
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+		if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
+						&vec_idx, buf_vec,
+						&head_idx, &len,
+						VHOST_ACCESS_RW) < 0))
+			return -1;
+		len = RTE_MIN(len, size);
+		update_shadow_used_ring_split(vq, head_idx, len);
+		size -= len;
 
-	if (unlikely(vq->access_ok == 0)) {
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
+		cur_idx++;
+		*num_buffers += 1;
 	}
 
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-	start_idx = vq->last_used_idx;
-	free_entries = avail_idx - start_idx;
-	count = RTE_MIN(count, free_entries);
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-	if (count == 0)
-		goto out;
+	*nr_vec = vec_idx;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-		dev->vid, start_idx, start_idx + count);
+	return 0;
+}
 
-	vq->batch_copy_nb_elems = 0;
+static __rte_always_inline int
+fill_vec_buf_packed_indirect(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			struct vring_packed_desc *desc, uint16_t *vec_idx,
+			struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+{
+	uint16_t i;
+	uint32_t nr_descs;
+	uint16_t vec_id = *vec_idx;
+	uint64_t dlen;
+	struct vring_packed_desc *descs, *idescs = NULL;
+
+	dlen = desc->len;
+	descs = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
+	if (unlikely(!descs))
+		return -1;
+
+	if (unlikely(dlen < desc->len)) {
+		/*
+		 * The indirect desc table is not contiguous
+		 * in process VA space, we have to copy it.
+		 */
+		idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+		if (unlikely(!idescs))
+			return -1;
 
-	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-	for (i = 0; i < count; i++) {
-		used_idx = (start_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
-		vq->used->ring[used_idx].id = desc_indexes[i];
-		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-					       dev->vhost_hlen;
-		vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
+		descs = idescs;
 	}
 
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		uint16_t desc_idx = desc_indexes[i];
-		int err;
-
-		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
-			descs = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev,
-						vq, vq->desc[desc_idx].addr,
-						vq->desc[desc_idx].len,
-						VHOST_ACCESS_RO);
-			if (unlikely(!descs)) {
-				count = i;
-				break;
-			}
-
-			desc_idx = 0;
-			sz = vq->desc[desc_idx].len / sizeof(*descs);
-		} else {
-			descs = vq->desc;
-			sz = vq->size;
-		}
+	nr_descs =  desc->len / sizeof(struct vring_packed_desc);
+	if (unlikely(nr_descs >= vq->size)) {
+		free_ind_table(idescs);
+		return -1;
+	}
 
-		err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
-		if (unlikely(err)) {
-			count = i;
-			break;
+	for (i = 0; i < nr_descs; i++) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+			free_ind_table(idescs);
+			return -1;
 		}
 
-		if (i + 1 < count)
-			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+		*len += descs[i].len;
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[i].addr, descs[i].len,
+						perm)))
+			return -1;
 	}
+	*vec_idx = vec_id;
 
-	do_data_copy_enqueue(dev, vq);
-
-	rte_smp_wmb();
-
-	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx += count;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, idx),
-		sizeof(vq->used->idx));
-
-	vhost_vring_call(dev, vq);
-out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
-out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	if (unlikely(!!idescs))
+		free_ind_table(idescs);
 
-	return count;
+	return 0;
 }
 
 static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			 uint32_t avail_idx, uint32_t *vec_idx,
-			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len)
+fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint16_t avail_idx, uint16_t *desc_count,
+				struct buf_vector *buf_vec, uint16_t *vec_idx,
+				uint16_t *buf_id, uint16_t *len, uint8_t perm)
 {
-	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-	uint32_t vec_id = *vec_idx;
-	uint32_t len    = 0;
-	struct vring_desc *descs = vq->desc;
+	bool wrap_counter = vq->avail_wrap_counter;
+	struct vring_packed_desc *descs = vq->desc_packed;
+	uint16_t vec_id = *vec_idx;
 
-	*desc_chain_head = idx;
+	if (avail_idx < vq->last_avail_idx)
+		wrap_counter ^= 1;
 
-	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
-		descs = (struct vring_desc *)(uintptr_t)
-			vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
-						vq->desc[idx].len,
-						VHOST_ACCESS_RO);
-		if (unlikely(!descs))
-			return -1;
+	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
+		return -1;
 
-		idx = 0;
-	}
+	*desc_count = 0;
 
 	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
 			return -1;
 
-		len += descs[idx].len;
-		buf_vec[vec_id].buf_addr = descs[idx].addr;
-		buf_vec[vec_id].buf_len  = descs[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
+		*desc_count += 1;
+		*buf_id = descs[avail_idx].id;
 
-		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+		if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
+			if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
+							&descs[avail_idx],
+							&vec_id, buf_vec,
+							len, perm) < 0))
+				return -1;
+		} else {
+			*len += descs[avail_idx].len;
+
+			if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+							descs[avail_idx].addr,
+							descs[avail_idx].len,
+							perm)))
+				return -1;
+		}
+
+		if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
 
-		idx = descs[idx].next;
+		if (++avail_idx >= vq->size) {
+			avail_idx -= vq->size;
+			wrap_counter ^= 1;
+		}
 	}
 
-	*desc_chain_len = len;
 	*vec_idx = vec_id;
 
 	return 0;
@@ -449,61 +563,74 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
  * Returns -1 on fail, 0 on success
  */
 static inline int
-reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
-				uint16_t *num_buffers, uint16_t avail_head)
+				uint16_t *nr_vec, uint16_t *num_buffers,
+				uint16_t *nr_descs)
 {
-	uint16_t cur_idx;
-	uint32_t vec_idx = 0;
-	uint16_t tries = 0;
+	uint16_t avail_idx;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
 
-	uint16_t head_idx = 0;
+	uint16_t buf_id = 0;
 	uint16_t len = 0;
+	uint16_t desc_count;
 
 	*num_buffers = 0;
-	cur_idx  = vq->last_avail_idx;
+	avail_idx = vq->last_avail_idx;
+
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
 
 	while (size > 0) {
-		if (unlikely(cur_idx == avail_head))
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(++tries > max_tries))
 			return -1;
 
-		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-						&head_idx, &len) < 0))
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						avail_idx, &desc_count,
+						buf_vec, &vec_idx,
+						&buf_id, &len,
+						VHOST_ACCESS_RO) < 0))
 			return -1;
+
 		len = RTE_MIN(len, size);
-		update_shadow_used_ring(vq, head_idx, len);
+		update_shadow_used_ring_packed(vq, buf_id, len, desc_count);
 		size -= len;
 
-		cur_idx++;
-		tries++;
-		*num_buffers += 1;
+		avail_idx += desc_count;
+		if (avail_idx >= vq->size)
+			avail_idx -= vq->size;
 
-		/*
-		 * if we tried all available ring items, and still
-		 * can't get enough buf, it means something abnormal
-		 * happened.
-		 */
-		if (unlikely(tries >= vq->size))
-			return -1;
+		*nr_descs += desc_count;
+		*num_buffers += 1;
 	}
 
+	*nr_vec = vec_idx;
+
 	return 0;
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
-			    uint16_t num_buffers)
+			    uint16_t nr_vec, uint16_t num_buffers)
 {
 	uint32_t vec_idx = 0;
-	uint64_t desc_addr;
 	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
+	uint32_t buf_offset, buf_avail;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t cpy_len;
-	uint64_t hdr_addr, hdr_phys_addr;
+	uint64_t hdr_addr;
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
+	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
 	int error = 0;
 
 	if (unlikely(m == NULL)) {
@@ -511,45 +638,61 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		goto out;
 	}
 
-	desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
-						buf_vec[vec_idx].buf_len,
-						VHOST_ACCESS_RW);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (nr_vec > 1)
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
 	hdr_mbuf = m;
-	hdr_addr = desc_addr;
-	hdr_phys_addr = buf_vec[vec_idx].buf_addr;
-	rte_prefetch0((void *)(uintptr_t)hdr_addr);
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen))
+		hdr = &tmp_hdr;
+	else
+		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
 
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	desc_offset = dev->vhost_hlen;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
+	} else {
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
+	}
 
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
 			vec_idx++;
-			desc_addr =
-				vhost_iova_to_vva(dev, vq,
-					buf_vec[vec_idx].buf_addr,
-					buf_vec[vec_idx].buf_len,
-					VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
+			if (unlikely(vec_idx >= nr_vec)) {
 				error = -1;
 				goto out;
 			}
 
-			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
+			/* Prefetch next buffer address. */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
+			buf_offset = 0;
+			buf_avail  = buf_len;
 		}
 
 		/* done with current mbuf, get the next one */
@@ -561,129 +704,221 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		}
 
 		if (hdr_addr) {
-			struct virtio_net_hdr_mrg_rxbuf *hdr;
-
-			hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)
-				hdr_addr;
 			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
-			ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
-
-			vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
-			PRINT_PACKET(dev, (uintptr_t)hdr_addr,
-				     dev->vhost_hlen, 0);
+			if (rxvq_is_mergeable(dev))
+				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+						num_buffers);
+
+			if (unlikely(hdr == &tmp_hdr)) {
+				uint64_t len;
+				uint64_t remain = dev->vhost_hlen;
+				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+				uint64_t iova = buf_vec[0].buf_iova;
+				uint16_t hdr_vec_idx = 0;
+
+				while (remain) {
+					len = RTE_MIN(remain,
+						buf_vec[hdr_vec_idx].buf_len);
+					dst = buf_vec[hdr_vec_idx].buf_addr;
+					rte_memcpy((void *)(uintptr_t)dst,
+							(void *)(uintptr_t)src,
+							len);
+
+					PRINT_PACKET(dev, (uintptr_t)dst,
+							(uint32_t)len, 0);
+					vhost_log_cache_write(dev, vq,
+							iova, len);
+
+					remain -= len;
+					iova += len;
+					src += len;
+					hdr_vec_idx++;
+				}
+			} else {
+				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+						dev->vhost_hlen, 0);
+				vhost_log_cache_write(dev, vq,
+						buf_vec[0].buf_iova,
+						dev->vhost_hlen);
+			}
 
 			hdr_addr = 0;
 		}
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
+		if (likely(cpy_len > MAX_BATCH_LEN ||
+					vq->batch_copy_nb_elems >= vq->size)) {
+			rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 				cpy_len);
-			vhost_log_write(dev,
-				buf_vec[vec_idx].buf_addr + desc_offset,
-				cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
+					cpy_len);
+			PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
 				cpy_len, 0);
 		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
+			batch_copy[vq->batch_copy_nb_elems].dst =
+				(void *)((uintptr_t)(buf_addr + buf_offset));
+			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr =
-				buf_vec[vec_idx].buf_addr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+			batch_copy[vq->batch_copy_nb_elems].log_addr =
+				buf_iova + buf_offset;
+			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
+			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail  -= cpy_len;
+		buf_offset += cpy_len;
 	}
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
 static __rte_always_inline uint32_t
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
 {
-	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0;
 	uint16_t num_buffers;
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint16_t avail_head;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 
-	vq = dev->virtqueue[queue_id];
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+		if (unlikely(reserve_avail_buf_split(dev, vq,
+						pkt_len, buf_vec, &num_buffers,
+						avail_head, &nr_vec) < 0)) {
+			VHOST_LOG_DEBUG(VHOST_DATA,
+				"(%d) failed to get enough desc from vring\n",
+				dev->vid);
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+			dev->vid, vq->last_avail_idx,
+			vq->last_avail_idx + num_buffers);
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-	if (count == 0)
-		goto out;
+		vq->last_avail_idx += num_buffers;
+	}
 
-	vq->batch_copy_nb_elems = 0;
+	do_data_copy_enqueue(dev, vq);
 
-	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	if (likely(vq->shadow_used_idx)) {
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 
-	vq->shadow_used_idx = 0;
-	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
+		uint16_t nr_descs = 0;
 
-		if (unlikely(reserve_avail_buf_mergeable(dev, vq,
-						pkt_len, buf_vec, &num_buffers,
-						avail_head) < 0)) {
-			LOG_DEBUG(VHOST_DATA,
+		if (unlikely(reserve_avail_buf_packed(dev, vq,
+						pkt_len, buf_vec, &nr_vec,
+						&num_buffers, &nr_descs) < 0)) {
+			VHOST_LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
 			dev->vid, vq->last_avail_idx,
 			vq->last_avail_idx + num_buffers);
 
-		if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
-						buf_vec, num_buffers) < 0) {
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		vq->last_avail_idx += num_buffers;
+		vq->last_avail_idx += nr_descs;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
 
 	do_data_copy_enqueue(dev, vq);
 
 	if (likely(vq->shadow_used_idx)) {
-		flush_shadow_used_ring(dev, vq);
-		vhost_vring_call(dev, vq);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (unlikely(vq->enabled == 0))
+		goto out_access_unlock;
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		goto out;
+
+	if (vq_is_packed(dev))
+		count = virtio_dev_rx_packed(dev, vq, pkts, count);
+	else
+		count = virtio_dev_rx_split(dev, vq, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -691,7 +926,7 @@ out:
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
 
-	return pkt_idx;
+	return count;
 }
 
 uint16_t
@@ -710,10 +945,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return 0;
 	}
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
 static inline bool
@@ -838,42 +1070,62 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
 
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, uint16_t max_desc,
-		  struct rte_mbuf *m, uint16_t desc_idx,
-		  struct rte_mempool *mbuf_pool)
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
 {
-	struct vring_desc *desc;
-	uint64_t desc_addr;
-	uint32_t desc_avail, desc_offset;
+	uint32_t buf_avail, buf_offset;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
 	struct rte_mbuf *cur = m, *prev = m;
+	struct virtio_net_hdr tmp_hdr;
 	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
-	uint32_t nr_desc = 1;
+	uint16_t vec_idx = 0;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
-	desc = &descs[desc_idx];
-	if (unlikely((desc->len < dev->vhost_hlen)) ||
-			(desc->flags & VRING_DESC_F_INDIRECT)) {
-		error = -1;
-		goto out;
-	}
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
 
-	desc_addr = vhost_iova_to_vva(dev,
-					vq, desc->addr,
-					desc->len,
-					VHOST_ACCESS_RO);
-	if (unlikely(!desc_addr)) {
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
+	if (likely(nr_vec > 1))
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (virtio_net_with_host_offload(dev)) {
-		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-		rte_prefetch0(hdr);
+		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+			uint64_t len;
+			uint64_t remain = sizeof(struct virtio_net_hdr);
+			uint64_t src;
+			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
+			uint16_t hdr_vec_idx = 0;
+
+			/*
+			 * No luck, the virtio-net header doesn't fit
+			 * in a contiguous virtual area.
+			 */
+			while (remain) {
+				len = RTE_MIN(remain,
+					buf_vec[hdr_vec_idx].buf_len);
+				src = buf_vec[hdr_vec_idx].buf_addr;
+				rte_memcpy((void *)(uintptr_t)dst,
+						   (void *)(uintptr_t)src, len);
+
+				remain -= len;
+				dst += len;
+				hdr_vec_idx++;
+			}
+
+			hdr = &tmp_hdr;
+		} else {
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
+			rte_prefetch0(hdr);
+		}
 	}
 
 	/*
@@ -881,41 +1133,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 * for Tx: the first for storing the header, and others
 	 * for storing the data.
 	 */
-	if (likely((desc->len == dev->vhost_hlen) &&
-		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &descs[desc->next];
-		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_addr = vhost_iova_to_vva(dev,
-							vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RO);
-		if (unlikely(!desc_addr)) {
-			error = -1;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail  = buf_len - buf_offset;
+	} else if (buf_len == dev->vhost_hlen) {
+		if (unlikely(++vec_idx >= nr_vec))
 			goto out;
-		}
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
 
-		desc_offset = 0;
-		desc_avail  = desc->len;
-		nr_desc    += 1;
+		buf_offset = 0;
+		buf_avail = buf_len;
 	} else {
-		desc_avail  = desc->len - dev->vhost_hlen;
-		desc_offset = dev->vhost_hlen;
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	}
 
-	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+	rte_prefetch0((void *)(uintptr_t)
+			(buf_addr + buf_offset));
 
-	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+	PRINT_PACKET(dev,
+			(uintptr_t)(buf_addr + buf_offset),
+			(uint32_t)buf_avail, 0);
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
 		uint64_t hpa;
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
 		/*
 		 * A desc buf might across two host physical pages that are
@@ -923,11 +1174,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * will be copied even though zero copy is enabled.
 		 */
 		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
-					desc->addr + desc_offset, cpy_len)))) {
+					buf_iova + buf_offset, cpy_len)))) {
 			cur->data_len = cpy_len;
 			cur->data_off = 0;
-			cur->buf_addr = (void *)(uintptr_t)(desc_addr
-				+ desc_offset);
+			cur->buf_addr =
+				(void *)(uintptr_t)(buf_addr + buf_offset);
 			cur->buf_iova = hpa;
 
 			/*
@@ -937,61 +1188,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_avail = cpy_len;
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
-				   copy_nb >= vq->size ||
+				   vq->batch_copy_nb_elems >= vq->size ||
 				   (hdr && cur == m))) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
 								   mbuf_offset),
-					   (void *)((uintptr_t)(desc_addr +
-								desc_offset)),
+					   (void *)((uintptr_t)(buf_addr +
+							   buf_offset)),
 					   cpy_len);
 			} else {
-				batch_copy[copy_nb].dst =
+				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
-				batch_copy[copy_nb].src =
-					(void *)((uintptr_t)(desc_addr +
-							     desc_offset));
-				batch_copy[copy_nb].len = cpy_len;
-				copy_nb++;
+				batch_copy[vq->batch_copy_nb_elems].src =
+					(void *)((uintptr_t)(buf_addr +
+								buf_offset));
+				batch_copy[vq->batch_copy_nb_elems].len =
+					cpy_len;
+				vq->batch_copy_nb_elems++;
 			}
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
 
-		/* This desc reaches to its end, get the next one */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
 				break;
 
-			if (unlikely(desc->next >= max_desc ||
-				     ++nr_desc > max_desc)) {
-				error = -1;
-				goto out;
-			}
-			desc = &descs[desc->next];
-			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_addr = vhost_iova_to_vva(dev,
-							vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
 
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			/*
+			 * Prefecth desc n + 1 buffer while
+			 * desc n buffer is processed.
+			 */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
+			buf_offset = 0;
+			buf_avail  = buf_len;
 
-			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+			PRINT_PACKET(dev, (uintptr_t)buf_addr,
+					(uint32_t)buf_avail, 0);
 		}
 
 		/*
@@ -1027,38 +1270,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		vhost_dequeue_offload(hdr, m);
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		 uint32_t used_idx, uint32_t desc_idx)
-{
-	vq->used->ring[used_idx].id  = desc_idx;
-	vq->used->ring[used_idx].len = 0;
-	vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint32_t count)
-{
-	if (unlikely(count == 0))
-		return;
-
-	rte_smp_wmb();
-	rte_smp_rmb();
-
-	vq->used->idx += count;
-	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
-	vhost_vring_call(dev, vq);
-}
-
 static __rte_always_inline struct zcopy_mbuf *
 get_zmbuf(struct vhost_virtqueue *vq)
 {
@@ -1118,66 +1333,137 @@ restore_mbuf(struct rte_mbuf *m)
 	}
 }
 
-uint16_t
-rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev;
-	struct rte_mbuf *rarp_mbuf = NULL;
-	struct vhost_virtqueue *vq;
-	uint32_t desc_indexes[MAX_PKT_BURST];
-	uint32_t used_idx;
-	uint32_t i = 0;
+	uint16_t i;
 	uint16_t free_entries;
-	uint16_t avail_idx;
 
-	dev = get_device(vid);
-	if (!dev)
-		return 0;
+	if (unlikely(dev->dequeue_zero_copy)) {
+		struct zcopy_mbuf *zmbuf, *next;
 
-	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"(%d) %s: built-in vhost net backend is disabled.\n",
-			dev->vid, __func__);
-		return 0;
-	}
+		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+		     zmbuf != NULL; zmbuf = next) {
+			next = TAILQ_NEXT(zmbuf, next);
 
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
+			if (mbuf_is_consumed(zmbuf->mbuf)) {
+				update_shadow_used_ring_split(vq,
+						zmbuf->desc_idx, 0);
+				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+				restore_mbuf(zmbuf->mbuf);
+				rte_pktmbuf_free(zmbuf->mbuf);
+				put_zmbuf(zmbuf);
+				vq->nr_zmbuf -= 1;
+			}
+		}
+
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
 	}
 
-	vq = dev->virtqueue[queue_id];
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+			vq->last_avail_idx;
+	if (free_entries == 0)
 		return 0;
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	vq->batch_copy_nb_elems = 0;
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, free_entries);
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+			dev->vid, count);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	for (i = 0; i < count; i++) {
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t head_idx, dummy_len;
+		uint16_t nr_vec = 0;
+		int err;
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (unlikely(fill_vec_buf_split(dev, vq,
+						vq->last_avail_idx + i,
+						&nr_vec, buf_vec,
+						&head_idx, &dummy_len,
+						VHOST_ACCESS_RO) < 0))
+			break;
+
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_split(vq, head_idx, 0);
+
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
+		if (unlikely(err)) {
+			rte_pktmbuf_free(pkts[i]);
+			break;
+		}
+
+		if (unlikely(dev->dequeue_zero_copy)) {
+			struct zcopy_mbuf *zmbuf;
+
+			zmbuf = get_zmbuf(vq);
+			if (!zmbuf) {
+				rte_pktmbuf_free(pkts[i]);
+				break;
+			}
+			zmbuf->mbuf = pkts[i];
+			zmbuf->desc_idx = head_idx;
+
+			/*
+			 * Pin lock the mbuf; we will check later to see
+			 * whether the mbuf is freed (when we are the last
+			 * user) or not. If that's the case, we then could
+			 * update the used ring safely.
+			 */
+			rte_mbuf_refcnt_update(pkts[i], 1);
+
+			vq->nr_zmbuf += 1;
+			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+		}
+	}
+	vq->last_avail_idx += i;
+
+	if (likely(dev->dequeue_zero_copy == 0)) {
+		do_data_copy_dequeue(vq);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return i;
+}
+
+static __rte_always_inline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	uint16_t i;
+
+	rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
 
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
-		int nr_updated = 0;
 
 		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
 		     zmbuf != NULL; zmbuf = next) {
 			next = TAILQ_NEXT(zmbuf, next);
 
 			if (mbuf_is_consumed(zmbuf->mbuf)) {
-				used_idx = vq->last_used_idx++ & (vq->size - 1);
-				update_used_ring(dev, vq, used_idx,
-						 zmbuf->desc_idx);
-				nr_updated += 1;
+				update_shadow_used_ring_packed(vq,
+						zmbuf->desc_idx,
+						0,
+						zmbuf->desc_count);
 
 				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
 				restore_mbuf(zmbuf->mbuf);
@@ -1187,93 +1473,34 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			}
 		}
 
-		update_used_idx(dev, vq, nr_updated);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
-	/*
-	 * Construct a RARP broadcast packet, and inject it to the "pkts"
-	 * array, to looks like that guest actually send such packet.
-	 *
-	 * Check user_send_rarp() for more information.
-	 *
-	 * broadcast_rarp shares a cacheline in the virtio_net structure
-	 * with some fields that are accessed during enqueue and
-	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
-	 * result in false sharing between enqueue and dequeue.
-	 *
-	 * Prevent unnecessary false sharing by reading broadcast_rarp first
-	 * and only performing cmpset if the read indicates it is likely to
-	 * be set.
-	 */
-
-	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
-			rte_atomic16_cmpset((volatile uint16_t *)
-				&dev->broadcast_rarp.cnt, 1, 0))) {
-
-		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
-		if (rarp_mbuf == NULL) {
-			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to make RARP packet.\n");
-			return 0;
-		}
-		count -= 1;
-	}
-
-	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
-			vq->last_avail_idx;
-	if (free_entries == 0)
-		goto out;
-
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-
-	/* Prefetch available and used ring */
-	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	used_idx  = vq->last_used_idx  & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[avail_idx]);
-	rte_prefetch0(&vq->used->ring[used_idx]);
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
 	count = RTE_MIN(count, MAX_PKT_BURST);
-	count = RTE_MIN(count, free_entries);
-	LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
 			dev->vid, count);
 
-	/* Retrieve all of the head indexes first to avoid caching issues. */
-	for (i = 0; i < count; i++) {
-		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[avail_idx];
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
-	}
-
-	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
-		struct vring_desc *desc;
-		uint16_t sz, idx;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t buf_id, dummy_len;
+		uint16_t desc_count, nr_vec = 0;
 		int err;
 
-		if (likely(i + 1 < count))
-			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						vq->last_avail_idx, &desc_count,
+						buf_vec, &nr_vec,
+						&buf_id, &dummy_len,
+						VHOST_ACCESS_RW) < 0))
+			break;
 
-		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
-			desc = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev, vq,
-						vq->desc[desc_indexes[i]].addr,
-						sizeof(*desc),
-						VHOST_ACCESS_RO);
-			if (unlikely(!desc))
-				break;
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_packed(vq, buf_id, 0,
+					desc_count);
 
-			rte_prefetch0(desc);
-			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
-			idx = 0;
-		} else {
-			desc = vq->desc;
-			sz = vq->size;
-			idx = desc_indexes[i];
-		}
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
@@ -1282,8 +1509,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			break;
 		}
 
-		err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			break;
@@ -1298,7 +1525,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 				break;
 			}
 			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = desc_indexes[i];
+			zmbuf->desc_idx = buf_id;
+			zmbuf->desc_count = desc_count;
 
 			/*
 			 * Pin lock the mbuf; we will check later to see
@@ -1311,15 +1539,103 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			vq->nr_zmbuf += 1;
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
+
+		vq->last_avail_idx += desc_count;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
-	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
 		do_data_copy_dequeue(vq);
-		vq->last_used_idx += i;
-		update_used_idx(dev, vq, i);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
+	}
+
+	return i;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+		return 0;
+
+	if (unlikely(vq->enabled == 0)) {
+		count = 0;
+		goto out_access_unlock;
+	}
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 *
+	 * broadcast_rarp shares a cacheline in the virtio_net structure
+	 * with some fields that are accessed during enqueue and
+	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+	 * result in false sharing between enqueue and dequeue.
+	 *
+	 * Prevent unnecessary false sharing by reading broadcast_rarp first
+	 * and only performing cmpset if the read indicates it is likely to
+	 * be set.
+	 */
+	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+			rte_atomic16_cmpset((volatile uint16_t *)
+				&dev->broadcast_rarp.cnt, 1, 0))) {
+
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+		if (rarp_mbuf == NULL) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to make RARP packet.\n");
+			count = 0;
+			goto out;
+		}
+		count -= 1;
 	}
 
+	if (vq_is_packed(dev))
+		count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
+	else
+		count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -1332,10 +1648,10 @@ out_access_unlock:
 		 * Inject it to the head of "pkts" array, so that switch's mac
 		 * learning table will get updated first.
 		 */
-		memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+		memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
 		pkts[0] = rarp_mbuf;
-		i += 1;
+		count += 1;
 	}
 
-	return i;
+	return count;
 }
diff --git a/lib/meson.build b/lib/meson.build
index ef615917..eb91f100 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -9,22 +9,27 @@
 # given as a dep, no need to mention ring. This is especially true for the
 # core libs which are widely reused, so their deps are kept to a minimum.
 libraries = [ 'compat', # just a header, used for versioning
-	'eal', 'ring', 'mempool', 'mbuf', 'net', 'ether', 'pci', # core
+	'kvargs',
+	'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core
 	'metrics', # bitrate/latency stats depends on this
 	'hash',    # efd depends on this
-	'kvargs',  # cryptodev depends on this
+	'timer',   # eventdev depends on this
 	'acl', 'bbdev', 'bitratestats', 'cfgfile',
-	'cmdline', 'cryptodev',
+	'cmdline', 'compressdev', 'cryptodev',
 	'distributor', 'efd', 'eventdev',
 	'gro', 'gso', 'ip_frag', 'jobstats',
 	'kni', 'latencystats', 'lpm', 'member',
-	'meter', 'power', 'pdump',
-	'reorder', 'sched', 'security', 'timer', 'vhost',
+	'meter', 'power', 'pdump', 'rawdev',
+	'reorder', 'sched', 'security', 'vhost',
 	# add pkt framework libs which use other libs from above
 	'port', 'table', 'pipeline',
 	# flow_classify lib depends on pkt framework table lib
-	'flow_classify']
+	'flow_classify', 'bpf']
 
+default_cflags = machine_args
+if cc.has_argument('-Wno-format-truncation')
+	default_cflags += '-Wno-format-truncation'
+endif
 foreach l:libraries
 	build = true
 	name = l
@@ -33,7 +38,7 @@ foreach l:libraries
 	sources = []
 	headers = []
 	includes = []
-	cflags = machine_args
+	cflags = default_cflags
 	objs = [] # other object files to link against, used e.g. for
 	          # instruction-set optimized versions of code
 
@@ -41,9 +46,12 @@ foreach l:libraries
 	# external package/library requirements
 	ext_deps = []
 	deps = ['eal']   # eal is standard dependency except for itself
-	if l == 'eal'
+	if l == 'kvargs'
 		deps = []
 	endif
+	if l == 'eal'
+		deps = ['kvargs']
+	endif
 
 	dir_name = 'librte_' + l
 	subdir(dir_name)
@@ -63,6 +71,10 @@ foreach l:libraries
 			shared_deps = ext_deps
 			static_deps = ext_deps
 			foreach d:deps
+				if not is_variable('shared_rte_' + d)
+					error('Missing dependency ' + d +
+						' for library ' + lib_name)
+				endif
 				shared_deps += [get_variable('shared_rte_' + d)]
 				static_deps += [get_variable('static_rte_' + d)]
 			endforeach
@@ -95,7 +107,7 @@ foreach l:libraries
 
 			# then use pre-build objects to build shared lib
 			sources = []
-			objs += static_lib.extract_all_objects()
+			objs += static_lib.extract_all_objects(recursive: false)
 			version_map = '@0@/@1@/rte_@2@_version.map'.format(
 					meson.current_source_dir(), dir_name, name)
 			shared_lib = shared_library(libname,
author	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:52:30 +0100
committer	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:53:17 +0100
commit	b63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree	83114aac64286fe616506c0b3dfaec2ab86ef835 /lib
parent	ca33590b6af032bff57d9cc70455660466a654b2 (diff)